4 # Copyright (C) 2006, 2007, 2008 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
40 from ganeti import ssh
41 from ganeti import utils
42 from ganeti import errors
43 from ganeti import hypervisor
44 from ganeti import locking
45 from ganeti import constants
46 from ganeti import objects
47 from ganeti import serializer
48 from ganeti import ssconf
49 from ganeti import uidpool
50 from ganeti import compat
51 from ganeti import masterd
53 import ganeti.masterd.instance # pylint: disable-msg=W0611
56 # Modifiable default values; need to define these here before the
60 """Returns an empty list.
67 """Returns an empty dict.
75 """Checks if the given value is not None.
78 return val is not None
82 """Checks if the given value is None.
89 """Checks if the given value is a boolean.
92 return isinstance(val, bool)
96 """Checks if the given value is an integer.
99 return isinstance(val, int)
103 """Checks if the given value is a float.
106 return isinstance(val, float)
110 """Checks if the given value is a string.
113 return isinstance(val, basestring)
117 """Checks if a given value evaluates to a boolean True value.
123 def _TElemOf(target_list):
124 """Builds a function that checks if a given value is a member of a list.
127 return lambda val: val in target_list
132 """Checks if the given value is a list.
135 return isinstance(val, list)
139 """Checks if the given value is a dictionary.
142 return isinstance(val, dict)
147 """Combine multiple functions using an AND operation.
151 return compat.all(t(val) for t in args)
156 """Combine multiple functions using an AND operation.
160 return compat.any(t(val) for t in args)
167 _TNonEmptyString = _TAnd(_TString, _TTrue)
171 _TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
174 def _TListOf(my_type):
175 """Checks if a given value is a list with all elements of the same type.
179 lambda lst: compat.all(my_type(v) for v in lst))
182 def _TDictOf(key_type, val_type):
183 """Checks a dict type for the type of its key/values.
187 lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
188 and compat.all(val_type(v)
189 for v in my_dict.values())))
193 class LogicalUnit(object):
194 """Logical Unit base class.
196 Subclasses must follow these rules:
197 - implement ExpandNames
198 - implement CheckPrereq (except when tasklets are used)
199 - implement Exec (except when tasklets are used)
200 - implement BuildHooksEnv
201 - redefine HPATH and HTYPE
202 - optionally redefine their run requirements:
203 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
205 Note that all commands require root permissions.
207 @ivar dry_run_result: the value (if any) that will be returned to the caller
208 in dry-run mode (signalled by opcode dry_run parameter)
209 @cvar _OP_DEFS: a list of opcode attributes and the defaults values
210 they should get if not already existing
219 def __init__(self, processor, op, context, rpc):
220 """Constructor for LogicalUnit.
222 This needs to be overridden in derived classes in order to check op
226 self.proc = processor
228 self.cfg = context.cfg
229 self.context = context
231 # Dicts used to declare locking needs to mcpu
232 self.needed_locks = None
233 self.acquired_locks = {}
234 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
236 self.remove_locks = {}
237 # Used to force good behavior when calling helper functions
238 self.recalculate_locks = {}
241 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
242 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
243 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
244 # support for dry-run
245 self.dry_run_result = None
246 # support for generic debug attribute
247 if (not hasattr(self.op, "debug_level") or
248 not isinstance(self.op.debug_level, int)):
249 self.op.debug_level = 0
254 for aname, aval in self._OP_DEFS:
255 if not hasattr(self.op, aname):
260 setattr(self.op, aname, dval)
262 for attr_name, test in self._OP_REQP:
263 if not hasattr(op, attr_name):
264 raise errors.OpPrereqError("Required parameter '%s' missing" %
265 attr_name, errors.ECODE_INVAL)
266 attr_val = getattr(op, attr_name, None)
267 if not callable(test):
268 raise errors.ProgrammerError("Validation for parameter '%s' failed,"
269 " given type is not a proper type (%s)" %
271 if not test(attr_val):
272 logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
273 self.op.OP_ID, attr_name, type(attr_val), attr_val)
274 raise errors.OpPrereqError("Parameter '%s' has invalid type" %
275 attr_name, errors.ECODE_INVAL)
277 self.CheckArguments()
280 """Returns the SshRunner object
284 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
287 ssh = property(fget=__GetSSH)
289 def CheckArguments(self):
290 """Check syntactic validity for the opcode arguments.
292 This method is for doing a simple syntactic check and ensure
293 validity of opcode parameters, without any cluster-related
294 checks. While the same can be accomplished in ExpandNames and/or
295 CheckPrereq, doing these separate is better because:
297 - ExpandNames is left as as purely a lock-related function
298 - CheckPrereq is run after we have acquired locks (and possible
301 The function is allowed to change the self.op attribute so that
302 later methods can no longer worry about missing parameters.
307 def ExpandNames(self):
308 """Expand names for this LU.
310 This method is called before starting to execute the opcode, and it should
311 update all the parameters of the opcode to their canonical form (e.g. a
312 short node name must be fully expanded after this method has successfully
313 completed). This way locking, hooks, logging, ecc. can work correctly.
315 LUs which implement this method must also populate the self.needed_locks
316 member, as a dict with lock levels as keys, and a list of needed lock names
319 - use an empty dict if you don't need any lock
320 - if you don't need any lock at a particular level omit that level
321 - don't put anything for the BGL level
322 - if you want all locks at a level use locking.ALL_SET as a value
324 If you need to share locks (rather than acquire them exclusively) at one
325 level you can modify self.share_locks, setting a true value (usually 1) for
326 that level. By default locks are not shared.
328 This function can also define a list of tasklets, which then will be
329 executed in order instead of the usual LU-level CheckPrereq and Exec
330 functions, if those are not defined by the LU.
334 # Acquire all nodes and one instance
335 self.needed_locks = {
336 locking.LEVEL_NODE: locking.ALL_SET,
337 locking.LEVEL_INSTANCE: ['instance1.example.tld'],
339 # Acquire just two nodes
340 self.needed_locks = {
341 locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
344 self.needed_locks = {} # No, you can't leave it to the default value None
347 # The implementation of this method is mandatory only if the new LU is
348 # concurrent, so that old LUs don't need to be changed all at the same
351 self.needed_locks = {} # Exclusive LUs don't need locks.
353 raise NotImplementedError
355 def DeclareLocks(self, level):
356 """Declare LU locking needs for a level
358 While most LUs can just declare their locking needs at ExpandNames time,
359 sometimes there's the need to calculate some locks after having acquired
360 the ones before. This function is called just before acquiring locks at a
361 particular level, but after acquiring the ones at lower levels, and permits
362 such calculations. It can be used to modify self.needed_locks, and by
363 default it does nothing.
365 This function is only called if you have something already set in
366 self.needed_locks for the level.
368 @param level: Locking level which is going to be locked
369 @type level: member of ganeti.locking.LEVELS
373 def CheckPrereq(self):
374 """Check prerequisites for this LU.
376 This method should check that the prerequisites for the execution
377 of this LU are fulfilled. It can do internode communication, but
378 it should be idempotent - no cluster or system changes are
381 The method should raise errors.OpPrereqError in case something is
382 not fulfilled. Its return value is ignored.
384 This method should also update all the parameters of the opcode to
385 their canonical form if it hasn't been done by ExpandNames before.
388 if self.tasklets is not None:
389 for (idx, tl) in enumerate(self.tasklets):
390 logging.debug("Checking prerequisites for tasklet %s/%s",
391 idx + 1, len(self.tasklets))
396 def Exec(self, feedback_fn):
399 This method should implement the actual work. It should raise
400 errors.OpExecError for failures that are somewhat dealt with in
404 if self.tasklets is not None:
405 for (idx, tl) in enumerate(self.tasklets):
406 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
409 raise NotImplementedError
411 def BuildHooksEnv(self):
412 """Build hooks environment for this LU.
414 This method should return a three-node tuple consisting of: a dict
415 containing the environment that will be used for running the
416 specific hook for this LU, a list of node names on which the hook
417 should run before the execution, and a list of node names on which
418 the hook should run after the execution.
420 The keys of the dict must not have 'GANETI_' prefixed as this will
421 be handled in the hooks runner. Also note additional keys will be
422 added by the hooks runner. If the LU doesn't define any
423 environment, an empty dict (and not None) should be returned.
425 No nodes should be returned as an empty list (and not None).
427 Note that if the HPATH for a LU class is None, this function will
431 raise NotImplementedError
433 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
434 """Notify the LU about the results of its hooks.
436 This method is called every time a hooks phase is executed, and notifies
437 the Logical Unit about the hooks' result. The LU can then use it to alter
438 its result based on the hooks. By default the method does nothing and the
439 previous result is passed back unchanged but any LU can define it if it
440 wants to use the local cluster hook-scripts somehow.
442 @param phase: one of L{constants.HOOKS_PHASE_POST} or
443 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
444 @param hook_results: the results of the multi-node hooks rpc call
445 @param feedback_fn: function used send feedback back to the caller
446 @param lu_result: the previous Exec result this LU had, or None
448 @return: the new Exec result, based on the previous result
452 # API must be kept, thus we ignore the unused argument and could
453 # be a function warnings
454 # pylint: disable-msg=W0613,R0201
457 def _ExpandAndLockInstance(self):
458 """Helper function to expand and lock an instance.
460 Many LUs that work on an instance take its name in self.op.instance_name
461 and need to expand it and then declare the expanded name for locking. This
462 function does it, and then updates self.op.instance_name to the expanded
463 name. It also initializes needed_locks as a dict, if this hasn't been done
467 if self.needed_locks is None:
468 self.needed_locks = {}
470 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
471 "_ExpandAndLockInstance called with instance-level locks set"
472 self.op.instance_name = _ExpandInstanceName(self.cfg,
473 self.op.instance_name)
474 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
476 def _LockInstancesNodes(self, primary_only=False):
477 """Helper function to declare instances' nodes for locking.
479 This function should be called after locking one or more instances to lock
480 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
481 with all primary or secondary nodes for instances already locked and
482 present in self.needed_locks[locking.LEVEL_INSTANCE].
484 It should be called from DeclareLocks, and for safety only works if
485 self.recalculate_locks[locking.LEVEL_NODE] is set.
487 In the future it may grow parameters to just lock some instance's nodes, or
488 to just lock primaries or secondary nodes, if needed.
490 If should be called in DeclareLocks in a way similar to::
492 if level == locking.LEVEL_NODE:
493 self._LockInstancesNodes()
495 @type primary_only: boolean
496 @param primary_only: only lock primary nodes of locked instances
499 assert locking.LEVEL_NODE in self.recalculate_locks, \
500 "_LockInstancesNodes helper function called with no nodes to recalculate"
502 # TODO: check if we're really been called with the instance locks held
504 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
505 # future we might want to have different behaviors depending on the value
506 # of self.recalculate_locks[locking.LEVEL_NODE]
508 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
509 instance = self.context.cfg.GetInstanceInfo(instance_name)
510 wanted_nodes.append(instance.primary_node)
512 wanted_nodes.extend(instance.secondary_nodes)
514 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
515 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
516 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
517 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
519 del self.recalculate_locks[locking.LEVEL_NODE]
522 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
523 """Simple LU which runs no hooks.
525 This LU is intended as a parent for other LogicalUnits which will
526 run no hooks, in order to reduce duplicate code.
532 def BuildHooksEnv(self):
533 """Empty BuildHooksEnv for NoHooksLu.
535 This just raises an error.
538 assert False, "BuildHooksEnv called for NoHooksLUs"
542 """Tasklet base class.
544 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
545 they can mix legacy code with tasklets. Locking needs to be done in the LU,
546 tasklets know nothing about locks.
548 Subclasses must follow these rules:
549 - Implement CheckPrereq
553 def __init__(self, lu):
560 def CheckPrereq(self):
561 """Check prerequisites for this tasklets.
563 This method should check whether the prerequisites for the execution of
564 this tasklet are fulfilled. It can do internode communication, but it
565 should be idempotent - no cluster or system changes are allowed.
567 The method should raise errors.OpPrereqError in case something is not
568 fulfilled. Its return value is ignored.
570 This method should also update all parameters to their canonical form if it
571 hasn't been done before.
576 def Exec(self, feedback_fn):
577 """Execute the tasklet.
579 This method should implement the actual work. It should raise
580 errors.OpExecError for failures that are somewhat dealt with in code, or
584 raise NotImplementedError
587 def _GetWantedNodes(lu, nodes):
588 """Returns list of checked and expanded node names.
590 @type lu: L{LogicalUnit}
591 @param lu: the logical unit on whose behalf we execute
593 @param nodes: list of node names or None for all nodes
595 @return: the list of nodes, sorted
596 @raise errors.ProgrammerError: if the nodes parameter is wrong type
600 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
601 " non-empty list of nodes whose name is to be expanded.")
603 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
604 return utils.NiceSort(wanted)
607 def _GetWantedInstances(lu, instances):
608 """Returns list of checked and expanded instance names.
610 @type lu: L{LogicalUnit}
611 @param lu: the logical unit on whose behalf we execute
612 @type instances: list
613 @param instances: list of instance names or None for all instances
615 @return: the list of instances, sorted
616 @raise errors.OpPrereqError: if the instances parameter is wrong type
617 @raise errors.OpPrereqError: if any of the passed instances is not found
621 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
623 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
627 def _GetUpdatedParams(old_params, update_dict,
628 use_default=True, use_none=False):
629 """Return the new version of a parameter dictionary.
631 @type old_params: dict
632 @param old_params: old parameters
633 @type update_dict: dict
634 @param update_dict: dict containing new parameter values, or
635 constants.VALUE_DEFAULT to reset the parameter to its default
637 @param use_default: boolean
638 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
639 values as 'to be deleted' values
640 @param use_none: boolean
641 @type use_none: whether to recognise C{None} values as 'to be
644 @return: the new parameter dictionary
647 params_copy = copy.deepcopy(old_params)
648 for key, val in update_dict.iteritems():
649 if ((use_default and val == constants.VALUE_DEFAULT) or
650 (use_none and val is None)):
656 params_copy[key] = val
660 def _CheckOutputFields(static, dynamic, selected):
661 """Checks whether all selected fields are valid.
663 @type static: L{utils.FieldSet}
664 @param static: static fields set
665 @type dynamic: L{utils.FieldSet}
666 @param dynamic: dynamic fields set
673 delta = f.NonMatching(selected)
675 raise errors.OpPrereqError("Unknown output fields selected: %s"
676 % ",".join(delta), errors.ECODE_INVAL)
679 def _CheckBooleanOpField(op, name):
680 """Validates boolean opcode parameters.
682 This will ensure that an opcode parameter is either a boolean value,
683 or None (but that it always exists).
686 val = getattr(op, name, None)
687 if not (val is None or isinstance(val, bool)):
688 raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
689 (name, str(val)), errors.ECODE_INVAL)
690 setattr(op, name, val)
693 def _CheckGlobalHvParams(params):
694 """Validates that given hypervisor params are not global ones.
696 This will ensure that instances don't get customised versions of
700 used_globals = constants.HVC_GLOBALS.intersection(params)
702 msg = ("The following hypervisor parameters are global and cannot"
703 " be customized at instance level, please modify them at"
704 " cluster level: %s" % utils.CommaJoin(used_globals))
705 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
708 def _CheckNodeOnline(lu, node):
709 """Ensure that a given node is online.
711 @param lu: the LU on behalf of which we make the check
712 @param node: the node to check
713 @raise errors.OpPrereqError: if the node is offline
716 if lu.cfg.GetNodeInfo(node).offline:
717 raise errors.OpPrereqError("Can't use offline node %s" % node,
721 def _CheckNodeNotDrained(lu, node):
722 """Ensure that a given node is not drained.
724 @param lu: the LU on behalf of which we make the check
725 @param node: the node to check
726 @raise errors.OpPrereqError: if the node is drained
729 if lu.cfg.GetNodeInfo(node).drained:
730 raise errors.OpPrereqError("Can't use drained node %s" % node,
734 def _CheckNodeHasOS(lu, node, os_name, force_variant):
735 """Ensure that a node supports a given OS.
737 @param lu: the LU on behalf of which we make the check
738 @param node: the node to check
739 @param os_name: the OS to query about
740 @param force_variant: whether to ignore variant errors
741 @raise errors.OpPrereqError: if the node is not supporting the OS
744 result = lu.rpc.call_os_get(node, os_name)
745 result.Raise("OS '%s' not in supported OS list for node %s" %
747 prereq=True, ecode=errors.ECODE_INVAL)
748 if not force_variant:
749 _CheckOSVariant(result.payload, os_name)
752 def _RequireFileStorage():
753 """Checks that file storage is enabled.
755 @raise errors.OpPrereqError: when file storage is disabled
758 if not constants.ENABLE_FILE_STORAGE:
759 raise errors.OpPrereqError("File storage disabled at configure time",
763 def _CheckDiskTemplate(template):
764 """Ensure a given disk template is valid.
767 if template not in constants.DISK_TEMPLATES:
768 msg = ("Invalid disk template name '%s', valid templates are: %s" %
769 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
770 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
771 if template == constants.DT_FILE:
772 _RequireFileStorage()
775 def _CheckStorageType(storage_type):
776 """Ensure a given storage type is valid.
779 if storage_type not in constants.VALID_STORAGE_TYPES:
780 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
782 if storage_type == constants.ST_FILE:
783 _RequireFileStorage()
787 def _GetClusterDomainSecret():
788 """Reads the cluster domain secret.
791 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
795 def _CheckInstanceDown(lu, instance, reason):
796 """Ensure that an instance is not running."""
797 if instance.admin_up:
798 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
799 (instance.name, reason), errors.ECODE_STATE)
801 pnode = instance.primary_node
802 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
803 ins_l.Raise("Can't contact node %s for instance information" % pnode,
804 prereq=True, ecode=errors.ECODE_ENVIRON)
806 if instance.name in ins_l.payload:
807 raise errors.OpPrereqError("Instance %s is running, %s" %
808 (instance.name, reason), errors.ECODE_STATE)
811 def _ExpandItemName(fn, name, kind):
812 """Expand an item name.
814 @param fn: the function to use for expansion
815 @param name: requested item name
816 @param kind: text description ('Node' or 'Instance')
817 @return: the resolved (full) name
818 @raise errors.OpPrereqError: if the item is not found
822 if full_name is None:
823 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
828 def _ExpandNodeName(cfg, name):
829 """Wrapper over L{_ExpandItemName} for nodes."""
830 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
833 def _ExpandInstanceName(cfg, name):
834 """Wrapper over L{_ExpandItemName} for instance."""
835 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
838 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
839 memory, vcpus, nics, disk_template, disks,
840 bep, hvp, hypervisor_name):
841 """Builds instance related env variables for hooks
843 This builds the hook environment from individual variables.
846 @param name: the name of the instance
847 @type primary_node: string
848 @param primary_node: the name of the instance's primary node
849 @type secondary_nodes: list
850 @param secondary_nodes: list of secondary nodes as strings
851 @type os_type: string
852 @param os_type: the name of the instance's OS
853 @type status: boolean
854 @param status: the should_run status of the instance
856 @param memory: the memory size of the instance
858 @param vcpus: the count of VCPUs the instance has
860 @param nics: list of tuples (ip, mac, mode, link) representing
861 the NICs the instance has
862 @type disk_template: string
863 @param disk_template: the disk template of the instance
865 @param disks: the list of (size, mode) pairs
867 @param bep: the backend parameters for the instance
869 @param hvp: the hypervisor parameters for the instance
870 @type hypervisor_name: string
871 @param hypervisor_name: the hypervisor for the instance
873 @return: the hook environment for this instance
882 "INSTANCE_NAME": name,
883 "INSTANCE_PRIMARY": primary_node,
884 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
885 "INSTANCE_OS_TYPE": os_type,
886 "INSTANCE_STATUS": str_status,
887 "INSTANCE_MEMORY": memory,
888 "INSTANCE_VCPUS": vcpus,
889 "INSTANCE_DISK_TEMPLATE": disk_template,
890 "INSTANCE_HYPERVISOR": hypervisor_name,
894 nic_count = len(nics)
895 for idx, (ip, mac, mode, link) in enumerate(nics):
898 env["INSTANCE_NIC%d_IP" % idx] = ip
899 env["INSTANCE_NIC%d_MAC" % idx] = mac
900 env["INSTANCE_NIC%d_MODE" % idx] = mode
901 env["INSTANCE_NIC%d_LINK" % idx] = link
902 if mode == constants.NIC_MODE_BRIDGED:
903 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
907 env["INSTANCE_NIC_COUNT"] = nic_count
910 disk_count = len(disks)
911 for idx, (size, mode) in enumerate(disks):
912 env["INSTANCE_DISK%d_SIZE" % idx] = size
913 env["INSTANCE_DISK%d_MODE" % idx] = mode
917 env["INSTANCE_DISK_COUNT"] = disk_count
919 for source, kind in [(bep, "BE"), (hvp, "HV")]:
920 for key, value in source.items():
921 env["INSTANCE_%s_%s" % (kind, key)] = value
926 def _NICListToTuple(lu, nics):
927 """Build a list of nic information tuples.
929 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
930 value in LUQueryInstanceData.
932 @type lu: L{LogicalUnit}
933 @param lu: the logical unit on whose behalf we execute
934 @type nics: list of L{objects.NIC}
935 @param nics: list of nics to convert to hooks tuples
939 cluster = lu.cfg.GetClusterInfo()
943 filled_params = cluster.SimpleFillNIC(nic.nicparams)
944 mode = filled_params[constants.NIC_MODE]
945 link = filled_params[constants.NIC_LINK]
946 hooks_nics.append((ip, mac, mode, link))
950 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
951 """Builds instance related env variables for hooks from an object.
953 @type lu: L{LogicalUnit}
954 @param lu: the logical unit on whose behalf we execute
955 @type instance: L{objects.Instance}
956 @param instance: the instance for which we should build the
959 @param override: dictionary with key/values that will override
962 @return: the hook environment dictionary
965 cluster = lu.cfg.GetClusterInfo()
966 bep = cluster.FillBE(instance)
967 hvp = cluster.FillHV(instance)
969 'name': instance.name,
970 'primary_node': instance.primary_node,
971 'secondary_nodes': instance.secondary_nodes,
972 'os_type': instance.os,
973 'status': instance.admin_up,
974 'memory': bep[constants.BE_MEMORY],
975 'vcpus': bep[constants.BE_VCPUS],
976 'nics': _NICListToTuple(lu, instance.nics),
977 'disk_template': instance.disk_template,
978 'disks': [(disk.size, disk.mode) for disk in instance.disks],
981 'hypervisor_name': instance.hypervisor,
984 args.update(override)
985 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
988 def _AdjustCandidatePool(lu, exceptions):
989 """Adjust the candidate pool after node operations.
992 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
994 lu.LogInfo("Promoted nodes to master candidate role: %s",
995 utils.CommaJoin(node.name for node in mod_list))
996 for name in mod_list:
997 lu.context.ReaddNode(name)
998 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1000 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1004 def _DecideSelfPromotion(lu, exceptions=None):
1005 """Decide whether I should promote myself as a master candidate.
1008 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1009 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1010 # the new node will increase mc_max with one, so:
1011 mc_should = min(mc_should + 1, cp_size)
1012 return mc_now < mc_should
1015 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1016 """Check that the brigdes needed by a list of nics exist.
1019 cluster = lu.cfg.GetClusterInfo()
1020 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1021 brlist = [params[constants.NIC_LINK] for params in paramslist
1022 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1024 result = lu.rpc.call_bridges_exist(target_node, brlist)
1025 result.Raise("Error checking bridges on destination node '%s'" %
1026 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1029 def _CheckInstanceBridgesExist(lu, instance, node=None):
1030 """Check that the brigdes needed by an instance exist.
1034 node = instance.primary_node
1035 _CheckNicsBridgesExist(lu, instance.nics, node)
1038 def _CheckOSVariant(os_obj, name):
1039 """Check whether an OS name conforms to the os variants specification.
1041 @type os_obj: L{objects.OS}
1042 @param os_obj: OS object to check
1044 @param name: OS name passed by the user, to check for validity
1047 if not os_obj.supported_variants:
1050 variant = name.split("+", 1)[1]
1052 raise errors.OpPrereqError("OS name must include a variant",
1055 if variant not in os_obj.supported_variants:
1056 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1059 def _GetNodeInstancesInner(cfg, fn):
1060 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1063 def _GetNodeInstances(cfg, node_name):
1064 """Returns a list of all primary and secondary instances on a node.
1068 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1071 def _GetNodePrimaryInstances(cfg, node_name):
1072 """Returns primary instances on a node.
1075 return _GetNodeInstancesInner(cfg,
1076 lambda inst: node_name == inst.primary_node)
1079 def _GetNodeSecondaryInstances(cfg, node_name):
1080 """Returns secondary instances on a node.
1083 return _GetNodeInstancesInner(cfg,
1084 lambda inst: node_name in inst.secondary_nodes)
1087 def _GetStorageTypeArgs(cfg, storage_type):
1088 """Returns the arguments for a storage type.
1091 # Special case for file storage
1092 if storage_type == constants.ST_FILE:
1093 # storage.FileStorage wants a list of storage directories
1094 return [[cfg.GetFileStorageDir()]]
1099 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1102 for dev in instance.disks:
1103 cfg.SetDiskID(dev, node_name)
1105 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1106 result.Raise("Failed to get disk status from node %s" % node_name,
1107 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1109 for idx, bdev_status in enumerate(result.payload):
1110 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1116 class LUPostInitCluster(LogicalUnit):
1117 """Logical unit for running hooks after cluster initialization.
1120 HPATH = "cluster-init"
1121 HTYPE = constants.HTYPE_CLUSTER
1124 def BuildHooksEnv(self):
1128 env = {"OP_TARGET": self.cfg.GetClusterName()}
1129 mn = self.cfg.GetMasterNode()
1130 return env, [], [mn]
1132 def Exec(self, feedback_fn):
1139 class LUDestroyCluster(LogicalUnit):
1140 """Logical unit for destroying the cluster.
1143 HPATH = "cluster-destroy"
1144 HTYPE = constants.HTYPE_CLUSTER
1147 def BuildHooksEnv(self):
1151 env = {"OP_TARGET": self.cfg.GetClusterName()}
1154 def CheckPrereq(self):
1155 """Check prerequisites.
1157 This checks whether the cluster is empty.
1159 Any errors are signaled by raising errors.OpPrereqError.
1162 master = self.cfg.GetMasterNode()
1164 nodelist = self.cfg.GetNodeList()
1165 if len(nodelist) != 1 or nodelist[0] != master:
1166 raise errors.OpPrereqError("There are still %d node(s) in"
1167 " this cluster." % (len(nodelist) - 1),
1169 instancelist = self.cfg.GetInstanceList()
1171 raise errors.OpPrereqError("There are still %d instance(s) in"
1172 " this cluster." % len(instancelist),
1175 def Exec(self, feedback_fn):
1176 """Destroys the cluster.
1179 master = self.cfg.GetMasterNode()
1180 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1182 # Run post hooks on master node before it's removed
1183 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1185 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1187 # pylint: disable-msg=W0702
1188 self.LogWarning("Errors occurred running hooks on %s" % master)
1190 result = self.rpc.call_node_stop_master(master, False)
1191 result.Raise("Could not disable the master role")
1193 if modify_ssh_setup:
1194 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1195 utils.CreateBackup(priv_key)
1196 utils.CreateBackup(pub_key)
1201 def _VerifyCertificate(filename):
1202 """Verifies a certificate for LUVerifyCluster.
1204 @type filename: string
1205 @param filename: Path to PEM file
1209 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1210 utils.ReadFile(filename))
1211 except Exception, err: # pylint: disable-msg=W0703
1212 return (LUVerifyCluster.ETYPE_ERROR,
1213 "Failed to load X509 certificate %s: %s" % (filename, err))
1216 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1217 constants.SSL_CERT_EXPIRATION_ERROR)
1220 fnamemsg = "While verifying %s: %s" % (filename, msg)
1225 return (None, fnamemsg)
1226 elif errcode == utils.CERT_WARNING:
1227 return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1228 elif errcode == utils.CERT_ERROR:
1229 return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1231 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1234 class LUVerifyCluster(LogicalUnit):
1235 """Verifies the cluster status.
1238 HPATH = "cluster-verify"
1239 HTYPE = constants.HTYPE_CLUSTER
1241 ("skip_checks", _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1242 ("verbose", _TBool),
1243 ("error_codes", _TBool),
1244 ("debug_simulate_errors", _TBool),
1248 TCLUSTER = "cluster"
1250 TINSTANCE = "instance"
1252 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1253 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1254 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1255 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1256 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1257 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1258 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1259 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1260 ENODEDRBD = (TNODE, "ENODEDRBD")
1261 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1262 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1263 ENODEHV = (TNODE, "ENODEHV")
1264 ENODELVM = (TNODE, "ENODELVM")
1265 ENODEN1 = (TNODE, "ENODEN1")
1266 ENODENET = (TNODE, "ENODENET")
1267 ENODEOS = (TNODE, "ENODEOS")
1268 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1269 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1270 ENODERPC = (TNODE, "ENODERPC")
1271 ENODESSH = (TNODE, "ENODESSH")
1272 ENODEVERSION = (TNODE, "ENODEVERSION")
1273 ENODESETUP = (TNODE, "ENODESETUP")
1274 ENODETIME = (TNODE, "ENODETIME")
1276 ETYPE_FIELD = "code"
1277 ETYPE_ERROR = "ERROR"
1278 ETYPE_WARNING = "WARNING"
1280 class NodeImage(object):
1281 """A class representing the logical and physical status of a node.
1284 @ivar name: the node name to which this object refers
1285 @ivar volumes: a structure as returned from
1286 L{ganeti.backend.GetVolumeList} (runtime)
1287 @ivar instances: a list of running instances (runtime)
1288 @ivar pinst: list of configured primary instances (config)
1289 @ivar sinst: list of configured secondary instances (config)
1290 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1291 of this node (config)
1292 @ivar mfree: free memory, as reported by hypervisor (runtime)
1293 @ivar dfree: free disk, as reported by the node (runtime)
1294 @ivar offline: the offline status (config)
1295 @type rpc_fail: boolean
1296 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1297 not whether the individual keys were correct) (runtime)
1298 @type lvm_fail: boolean
1299 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1300 @type hyp_fail: boolean
1301 @ivar hyp_fail: whether the RPC call didn't return the instance list
1302 @type ghost: boolean
1303 @ivar ghost: whether this is a known node or not (config)
1304 @type os_fail: boolean
1305 @ivar os_fail: whether the RPC call didn't return valid OS data
1307 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1310 def __init__(self, offline=False, name=None):
1319 self.offline = offline
1320 self.rpc_fail = False
1321 self.lvm_fail = False
1322 self.hyp_fail = False
1324 self.os_fail = False
1327 def ExpandNames(self):
1328 self.needed_locks = {
1329 locking.LEVEL_NODE: locking.ALL_SET,
1330 locking.LEVEL_INSTANCE: locking.ALL_SET,
1332 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1334 def _Error(self, ecode, item, msg, *args, **kwargs):
1335 """Format an error message.
1337 Based on the opcode's error_codes parameter, either format a
1338 parseable error code, or a simpler error string.
1340 This must be called only from Exec and functions called from Exec.
1343 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1345 # first complete the msg
1348 # then format the whole message
1349 if self.op.error_codes:
1350 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1356 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1357 # and finally report it via the feedback_fn
1358 self._feedback_fn(" - %s" % msg)
1360 def _ErrorIf(self, cond, *args, **kwargs):
1361 """Log an error message if the passed condition is True.
1364 cond = bool(cond) or self.op.debug_simulate_errors
1366 self._Error(*args, **kwargs)
1367 # do not mark the operation as failed for WARN cases only
1368 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1369 self.bad = self.bad or cond
1371 def _VerifyNode(self, ninfo, nresult):
1372 """Run multiple tests against a node.
1376 - compares ganeti version
1377 - checks vg existence and size > 20G
1378 - checks config file checksum
1379 - checks ssh to other nodes
1381 @type ninfo: L{objects.Node}
1382 @param ninfo: the node to check
1383 @param nresult: the results from the node
1385 @return: whether overall this call was successful (and we can expect
1386 reasonable values in the respose)
1390 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1392 # main result, nresult should be a non-empty dict
1393 test = not nresult or not isinstance(nresult, dict)
1394 _ErrorIf(test, self.ENODERPC, node,
1395 "unable to verify node: no data returned")
1399 # compares ganeti version
1400 local_version = constants.PROTOCOL_VERSION
1401 remote_version = nresult.get("version", None)
1402 test = not (remote_version and
1403 isinstance(remote_version, (list, tuple)) and
1404 len(remote_version) == 2)
1405 _ErrorIf(test, self.ENODERPC, node,
1406 "connection to node returned invalid data")
1410 test = local_version != remote_version[0]
1411 _ErrorIf(test, self.ENODEVERSION, node,
1412 "incompatible protocol versions: master %s,"
1413 " node %s", local_version, remote_version[0])
1417 # node seems compatible, we can actually try to look into its results
1419 # full package version
1420 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1421 self.ENODEVERSION, node,
1422 "software version mismatch: master %s, node %s",
1423 constants.RELEASE_VERSION, remote_version[1],
1424 code=self.ETYPE_WARNING)
1426 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1427 if isinstance(hyp_result, dict):
1428 for hv_name, hv_result in hyp_result.iteritems():
1429 test = hv_result is not None
1430 _ErrorIf(test, self.ENODEHV, node,
1431 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1434 test = nresult.get(constants.NV_NODESETUP,
1435 ["Missing NODESETUP results"])
1436 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1441 def _VerifyNodeTime(self, ninfo, nresult,
1442 nvinfo_starttime, nvinfo_endtime):
1443 """Check the node time.
1445 @type ninfo: L{objects.Node}
1446 @param ninfo: the node to check
1447 @param nresult: the remote results for the node
1448 @param nvinfo_starttime: the start time of the RPC call
1449 @param nvinfo_endtime: the end time of the RPC call
1453 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1455 ntime = nresult.get(constants.NV_TIME, None)
1457 ntime_merged = utils.MergeTime(ntime)
1458 except (ValueError, TypeError):
1459 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1462 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1463 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1464 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1465 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1469 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1470 "Node time diverges by at least %s from master node time",
1473 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1474 """Check the node time.
1476 @type ninfo: L{objects.Node}
1477 @param ninfo: the node to check
1478 @param nresult: the remote results for the node
1479 @param vg_name: the configured VG name
1486 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1488 # checks vg existence and size > 20G
1489 vglist = nresult.get(constants.NV_VGLIST, None)
1491 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1493 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1494 constants.MIN_VG_SIZE)
1495 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1498 pvlist = nresult.get(constants.NV_PVLIST, None)
1499 test = pvlist is None
1500 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1502 # check that ':' is not present in PV names, since it's a
1503 # special character for lvcreate (denotes the range of PEs to
1505 for _, pvname, owner_vg in pvlist:
1506 test = ":" in pvname
1507 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1508 " '%s' of VG '%s'", pvname, owner_vg)
1510 def _VerifyNodeNetwork(self, ninfo, nresult):
1511 """Check the node time.
1513 @type ninfo: L{objects.Node}
1514 @param ninfo: the node to check
1515 @param nresult: the remote results for the node
1519 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1521 test = constants.NV_NODELIST not in nresult
1522 _ErrorIf(test, self.ENODESSH, node,
1523 "node hasn't returned node ssh connectivity data")
1525 if nresult[constants.NV_NODELIST]:
1526 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1527 _ErrorIf(True, self.ENODESSH, node,
1528 "ssh communication with node '%s': %s", a_node, a_msg)
1530 test = constants.NV_NODENETTEST not in nresult
1531 _ErrorIf(test, self.ENODENET, node,
1532 "node hasn't returned node tcp connectivity data")
1534 if nresult[constants.NV_NODENETTEST]:
1535 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1537 _ErrorIf(True, self.ENODENET, node,
1538 "tcp communication with node '%s': %s",
1539 anode, nresult[constants.NV_NODENETTEST][anode])
1541 test = constants.NV_MASTERIP not in nresult
1542 _ErrorIf(test, self.ENODENET, node,
1543 "node hasn't returned node master IP reachability data")
1545 if not nresult[constants.NV_MASTERIP]:
1546 if node == self.master_node:
1547 msg = "the master node cannot reach the master IP (not configured?)"
1549 msg = "cannot reach the master IP"
1550 _ErrorIf(True, self.ENODENET, node, msg)
1553 def _VerifyInstance(self, instance, instanceconfig, node_image):
1554 """Verify an instance.
1556 This function checks to see if the required block devices are
1557 available on the instance's node.
1560 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1561 node_current = instanceconfig.primary_node
1563 node_vol_should = {}
1564 instanceconfig.MapLVsByNode(node_vol_should)
1566 for node in node_vol_should:
1567 n_img = node_image[node]
1568 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1569 # ignore missing volumes on offline or broken nodes
1571 for volume in node_vol_should[node]:
1572 test = volume not in n_img.volumes
1573 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1574 "volume %s missing on node %s", volume, node)
1576 if instanceconfig.admin_up:
1577 pri_img = node_image[node_current]
1578 test = instance not in pri_img.instances and not pri_img.offline
1579 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1580 "instance not running on its primary node %s",
1583 for node, n_img in node_image.items():
1584 if (not node == node_current):
1585 test = instance in n_img.instances
1586 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1587 "instance should not run on node %s", node)
1589 def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1590 """Verify if there are any unknown volumes in the cluster.
1592 The .os, .swap and backup volumes are ignored. All other volumes are
1593 reported as unknown.
1596 for node, n_img in node_image.items():
1597 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1598 # skip non-healthy nodes
1600 for volume in n_img.volumes:
1601 test = (node not in node_vol_should or
1602 volume not in node_vol_should[node])
1603 self._ErrorIf(test, self.ENODEORPHANLV, node,
1604 "volume %s is unknown", volume)
1606 def _VerifyOrphanInstances(self, instancelist, node_image):
1607 """Verify the list of running instances.
1609 This checks what instances are running but unknown to the cluster.
1612 for node, n_img in node_image.items():
1613 for o_inst in n_img.instances:
1614 test = o_inst not in instancelist
1615 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1616 "instance %s on node %s should not exist", o_inst, node)
1618 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1619 """Verify N+1 Memory Resilience.
1621 Check that if one single node dies we can still start all the
1622 instances it was primary for.
1625 for node, n_img in node_image.items():
1626 # This code checks that every node which is now listed as
1627 # secondary has enough memory to host all instances it is
1628 # supposed to should a single other node in the cluster fail.
1629 # FIXME: not ready for failover to an arbitrary node
1630 # FIXME: does not support file-backed instances
1631 # WARNING: we currently take into account down instances as well
1632 # as up ones, considering that even if they're down someone
1633 # might want to start them even in the event of a node failure.
1634 for prinode, instances in n_img.sbp.items():
1636 for instance in instances:
1637 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1638 if bep[constants.BE_AUTO_BALANCE]:
1639 needed_mem += bep[constants.BE_MEMORY]
1640 test = n_img.mfree < needed_mem
1641 self._ErrorIf(test, self.ENODEN1, node,
1642 "not enough memory on to accommodate"
1643 " failovers should peer node %s fail", prinode)
1645 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1647 """Verifies and computes the node required file checksums.
1649 @type ninfo: L{objects.Node}
1650 @param ninfo: the node to check
1651 @param nresult: the remote results for the node
1652 @param file_list: required list of files
1653 @param local_cksum: dictionary of local files and their checksums
1654 @param master_files: list of files that only masters should have
1658 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1660 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1661 test = not isinstance(remote_cksum, dict)
1662 _ErrorIf(test, self.ENODEFILECHECK, node,
1663 "node hasn't returned file checksum data")
1667 for file_name in file_list:
1668 node_is_mc = ninfo.master_candidate
1669 must_have = (file_name not in master_files) or node_is_mc
1671 test1 = file_name not in remote_cksum
1673 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1675 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1676 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1677 "file '%s' missing", file_name)
1678 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1679 "file '%s' has wrong checksum", file_name)
1680 # not candidate and this is not a must-have file
1681 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1682 "file '%s' should not exist on non master"
1683 " candidates (and the file is outdated)", file_name)
1684 # all good, except non-master/non-must have combination
1685 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1686 "file '%s' should not exist"
1687 " on non master candidates", file_name)
1689 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1690 """Verifies and the node DRBD status.
1692 @type ninfo: L{objects.Node}
1693 @param ninfo: the node to check
1694 @param nresult: the remote results for the node
1695 @param instanceinfo: the dict of instances
1696 @param drbd_map: the DRBD map as returned by
1697 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1701 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1703 # compute the DRBD minors
1705 for minor, instance in drbd_map[node].items():
1706 test = instance not in instanceinfo
1707 _ErrorIf(test, self.ECLUSTERCFG, None,
1708 "ghost instance '%s' in temporary DRBD map", instance)
1709 # ghost instance should not be running, but otherwise we
1710 # don't give double warnings (both ghost instance and
1711 # unallocated minor in use)
1713 node_drbd[minor] = (instance, False)
1715 instance = instanceinfo[instance]
1716 node_drbd[minor] = (instance.name, instance.admin_up)
1718 # and now check them
1719 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1720 test = not isinstance(used_minors, (tuple, list))
1721 _ErrorIf(test, self.ENODEDRBD, node,
1722 "cannot parse drbd status file: %s", str(used_minors))
1724 # we cannot check drbd status
1727 for minor, (iname, must_exist) in node_drbd.items():
1728 test = minor not in used_minors and must_exist
1729 _ErrorIf(test, self.ENODEDRBD, node,
1730 "drbd minor %d of instance %s is not active", minor, iname)
1731 for minor in used_minors:
1732 test = minor not in node_drbd
1733 _ErrorIf(test, self.ENODEDRBD, node,
1734 "unallocated drbd minor %d is in use", minor)
1736 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1737 """Builds the node OS structures.
1739 @type ninfo: L{objects.Node}
1740 @param ninfo: the node to check
1741 @param nresult: the remote results for the node
1742 @param nimg: the node image object
1746 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1748 remote_os = nresult.get(constants.NV_OSLIST, None)
1749 test = (not isinstance(remote_os, list) or
1750 not compat.all(isinstance(v, list) and len(v) == 7
1751 for v in remote_os))
1753 _ErrorIf(test, self.ENODEOS, node,
1754 "node hasn't returned valid OS data")
1763 for (name, os_path, status, diagnose,
1764 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1766 if name not in os_dict:
1769 # parameters is a list of lists instead of list of tuples due to
1770 # JSON lacking a real tuple type, fix it:
1771 parameters = [tuple(v) for v in parameters]
1772 os_dict[name].append((os_path, status, diagnose,
1773 set(variants), set(parameters), set(api_ver)))
1775 nimg.oslist = os_dict
1777 def _VerifyNodeOS(self, ninfo, nimg, base):
1778 """Verifies the node OS list.
1780 @type ninfo: L{objects.Node}
1781 @param ninfo: the node to check
1782 @param nimg: the node image object
1783 @param base: the 'template' node we match against (e.g. from the master)
1787 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1789 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1791 for os_name, os_data in nimg.oslist.items():
1792 assert os_data, "Empty OS status for OS %s?!" % os_name
1793 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1794 _ErrorIf(not f_status, self.ENODEOS, node,
1795 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1796 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1797 "OS '%s' has multiple entries (first one shadows the rest): %s",
1798 os_name, utils.CommaJoin([v[0] for v in os_data]))
1799 # this will catched in backend too
1800 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1801 and not f_var, self.ENODEOS, node,
1802 "OS %s with API at least %d does not declare any variant",
1803 os_name, constants.OS_API_V15)
1804 # comparisons with the 'base' image
1805 test = os_name not in base.oslist
1806 _ErrorIf(test, self.ENODEOS, node,
1807 "Extra OS %s not present on reference node (%s)",
1811 assert base.oslist[os_name], "Base node has empty OS status?"
1812 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1814 # base OS is invalid, skipping
1816 for kind, a, b in [("API version", f_api, b_api),
1817 ("variants list", f_var, b_var),
1818 ("parameters", f_param, b_param)]:
1819 _ErrorIf(a != b, self.ENODEOS, node,
1820 "OS %s %s differs from reference node %s: %s vs. %s",
1821 kind, os_name, base.name,
1822 utils.CommaJoin(a), utils.CommaJoin(b))
1824 # check any missing OSes
1825 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1826 _ErrorIf(missing, self.ENODEOS, node,
1827 "OSes present on reference node %s but missing on this node: %s",
1828 base.name, utils.CommaJoin(missing))
1830 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1831 """Verifies and updates the node volume data.
1833 This function will update a L{NodeImage}'s internal structures
1834 with data from the remote call.
1836 @type ninfo: L{objects.Node}
1837 @param ninfo: the node to check
1838 @param nresult: the remote results for the node
1839 @param nimg: the node image object
1840 @param vg_name: the configured VG name
1844 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1846 nimg.lvm_fail = True
1847 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1850 elif isinstance(lvdata, basestring):
1851 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1852 utils.SafeEncode(lvdata))
1853 elif not isinstance(lvdata, dict):
1854 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1856 nimg.volumes = lvdata
1857 nimg.lvm_fail = False
1859 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1860 """Verifies and updates the node instance list.
1862 If the listing was successful, then updates this node's instance
1863 list. Otherwise, it marks the RPC call as failed for the instance
1866 @type ninfo: L{objects.Node}
1867 @param ninfo: the node to check
1868 @param nresult: the remote results for the node
1869 @param nimg: the node image object
1872 idata = nresult.get(constants.NV_INSTANCELIST, None)
1873 test = not isinstance(idata, list)
1874 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1875 " (instancelist): %s", utils.SafeEncode(str(idata)))
1877 nimg.hyp_fail = True
1879 nimg.instances = idata
1881 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1882 """Verifies and computes a node information map
1884 @type ninfo: L{objects.Node}
1885 @param ninfo: the node to check
1886 @param nresult: the remote results for the node
1887 @param nimg: the node image object
1888 @param vg_name: the configured VG name
1892 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1894 # try to read free memory (from the hypervisor)
1895 hv_info = nresult.get(constants.NV_HVINFO, None)
1896 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1897 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1900 nimg.mfree = int(hv_info["memory_free"])
1901 except (ValueError, TypeError):
1902 _ErrorIf(True, self.ENODERPC, node,
1903 "node returned invalid nodeinfo, check hypervisor")
1905 # FIXME: devise a free space model for file based instances as well
1906 if vg_name is not None:
1907 test = (constants.NV_VGLIST not in nresult or
1908 vg_name not in nresult[constants.NV_VGLIST])
1909 _ErrorIf(test, self.ENODELVM, node,
1910 "node didn't return data for the volume group '%s'"
1911 " - it is either missing or broken", vg_name)
1914 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1915 except (ValueError, TypeError):
1916 _ErrorIf(True, self.ENODERPC, node,
1917 "node returned invalid LVM info, check LVM status")
1919 def BuildHooksEnv(self):
1922 Cluster-Verify hooks just ran in the post phase and their failure makes
1923 the output be logged in the verify output and the verification to fail.
1926 all_nodes = self.cfg.GetNodeList()
1928 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1930 for node in self.cfg.GetAllNodesInfo().values():
1931 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1933 return env, [], all_nodes
1935 def Exec(self, feedback_fn):
1936 """Verify integrity of cluster, performing various test on nodes.
1940 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1941 verbose = self.op.verbose
1942 self._feedback_fn = feedback_fn
1943 feedback_fn("* Verifying global settings")
1944 for msg in self.cfg.VerifyConfig():
1945 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1947 # Check the cluster certificates
1948 for cert_filename in constants.ALL_CERT_FILES:
1949 (errcode, msg) = _VerifyCertificate(cert_filename)
1950 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1952 vg_name = self.cfg.GetVGName()
1953 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1954 cluster = self.cfg.GetClusterInfo()
1955 nodelist = utils.NiceSort(self.cfg.GetNodeList())
1956 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1957 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1958 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1959 for iname in instancelist)
1960 i_non_redundant = [] # Non redundant instances
1961 i_non_a_balanced = [] # Non auto-balanced instances
1962 n_offline = 0 # Count of offline nodes
1963 n_drained = 0 # Count of nodes being drained
1964 node_vol_should = {}
1966 # FIXME: verify OS list
1967 # do local checksums
1968 master_files = [constants.CLUSTER_CONF_FILE]
1969 master_node = self.master_node = self.cfg.GetMasterNode()
1970 master_ip = self.cfg.GetMasterIP()
1972 file_names = ssconf.SimpleStore().GetFileList()
1973 file_names.extend(constants.ALL_CERT_FILES)
1974 file_names.extend(master_files)
1975 if cluster.modify_etc_hosts:
1976 file_names.append(constants.ETC_HOSTS)
1978 local_checksums = utils.FingerprintFiles(file_names)
1980 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1981 node_verify_param = {
1982 constants.NV_FILELIST: file_names,
1983 constants.NV_NODELIST: [node.name for node in nodeinfo
1984 if not node.offline],
1985 constants.NV_HYPERVISOR: hypervisors,
1986 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1987 node.secondary_ip) for node in nodeinfo
1988 if not node.offline],
1989 constants.NV_INSTANCELIST: hypervisors,
1990 constants.NV_VERSION: None,
1991 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1992 constants.NV_NODESETUP: None,
1993 constants.NV_TIME: None,
1994 constants.NV_MASTERIP: (master_node, master_ip),
1995 constants.NV_OSLIST: None,
1998 if vg_name is not None:
1999 node_verify_param[constants.NV_VGLIST] = None
2000 node_verify_param[constants.NV_LVLIST] = vg_name
2001 node_verify_param[constants.NV_PVLIST] = [vg_name]
2002 node_verify_param[constants.NV_DRBDLIST] = None
2004 # Build our expected cluster state
2005 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2007 for node in nodeinfo)
2009 for instance in instancelist:
2010 inst_config = instanceinfo[instance]
2012 for nname in inst_config.all_nodes:
2013 if nname not in node_image:
2015 gnode = self.NodeImage(name=nname)
2017 node_image[nname] = gnode
2019 inst_config.MapLVsByNode(node_vol_should)
2021 pnode = inst_config.primary_node
2022 node_image[pnode].pinst.append(instance)
2024 for snode in inst_config.secondary_nodes:
2025 nimg = node_image[snode]
2026 nimg.sinst.append(instance)
2027 if pnode not in nimg.sbp:
2028 nimg.sbp[pnode] = []
2029 nimg.sbp[pnode].append(instance)
2031 # At this point, we have the in-memory data structures complete,
2032 # except for the runtime information, which we'll gather next
2034 # Due to the way our RPC system works, exact response times cannot be
2035 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2036 # time before and after executing the request, we can at least have a time
2038 nvinfo_starttime = time.time()
2039 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2040 self.cfg.GetClusterName())
2041 nvinfo_endtime = time.time()
2043 all_drbd_map = self.cfg.ComputeDRBDMap()
2045 feedback_fn("* Verifying node status")
2049 for node_i in nodeinfo:
2051 nimg = node_image[node]
2055 feedback_fn("* Skipping offline node %s" % (node,))
2059 if node == master_node:
2061 elif node_i.master_candidate:
2062 ntype = "master candidate"
2063 elif node_i.drained:
2069 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2071 msg = all_nvinfo[node].fail_msg
2072 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2074 nimg.rpc_fail = True
2077 nresult = all_nvinfo[node].payload
2079 nimg.call_ok = self._VerifyNode(node_i, nresult)
2080 self._VerifyNodeNetwork(node_i, nresult)
2081 self._VerifyNodeLVM(node_i, nresult, vg_name)
2082 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2084 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
2085 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2087 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2088 self._UpdateNodeInstances(node_i, nresult, nimg)
2089 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2090 self._UpdateNodeOS(node_i, nresult, nimg)
2091 if not nimg.os_fail:
2092 if refos_img is None:
2094 self._VerifyNodeOS(node_i, nimg, refos_img)
2096 feedback_fn("* Verifying instance status")
2097 for instance in instancelist:
2099 feedback_fn("* Verifying instance %s" % instance)
2100 inst_config = instanceinfo[instance]
2101 self._VerifyInstance(instance, inst_config, node_image)
2102 inst_nodes_offline = []
2104 pnode = inst_config.primary_node
2105 pnode_img = node_image[pnode]
2106 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2107 self.ENODERPC, pnode, "instance %s, connection to"
2108 " primary node failed", instance)
2110 if pnode_img.offline:
2111 inst_nodes_offline.append(pnode)
2113 # If the instance is non-redundant we cannot survive losing its primary
2114 # node, so we are not N+1 compliant. On the other hand we have no disk
2115 # templates with more than one secondary so that situation is not well
2117 # FIXME: does not support file-backed instances
2118 if not inst_config.secondary_nodes:
2119 i_non_redundant.append(instance)
2120 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2121 instance, "instance has multiple secondary nodes: %s",
2122 utils.CommaJoin(inst_config.secondary_nodes),
2123 code=self.ETYPE_WARNING)
2125 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2126 i_non_a_balanced.append(instance)
2128 for snode in inst_config.secondary_nodes:
2129 s_img = node_image[snode]
2130 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2131 "instance %s, connection to secondary node failed", instance)
2134 inst_nodes_offline.append(snode)
2136 # warn that the instance lives on offline nodes
2137 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2138 "instance lives on offline node(s) %s",
2139 utils.CommaJoin(inst_nodes_offline))
2140 # ... or ghost nodes
2141 for node in inst_config.all_nodes:
2142 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2143 "instance lives on ghost node %s", node)
2145 feedback_fn("* Verifying orphan volumes")
2146 self._VerifyOrphanVolumes(node_vol_should, node_image)
2148 feedback_fn("* Verifying orphan instances")
2149 self._VerifyOrphanInstances(instancelist, node_image)
2151 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2152 feedback_fn("* Verifying N+1 Memory redundancy")
2153 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2155 feedback_fn("* Other Notes")
2157 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2158 % len(i_non_redundant))
2160 if i_non_a_balanced:
2161 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2162 % len(i_non_a_balanced))
2165 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2168 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2172 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2173 """Analyze the post-hooks' result
2175 This method analyses the hook result, handles it, and sends some
2176 nicely-formatted feedback back to the user.
2178 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2179 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2180 @param hooks_results: the results of the multi-node hooks rpc call
2181 @param feedback_fn: function used send feedback back to the caller
2182 @param lu_result: previous Exec result
2183 @return: the new Exec result, based on the previous result
2187 # We only really run POST phase hooks, and are only interested in
2189 if phase == constants.HOOKS_PHASE_POST:
2190 # Used to change hooks' output to proper indentation
2191 indent_re = re.compile('^', re.M)
2192 feedback_fn("* Hooks Results")
2193 assert hooks_results, "invalid result from hooks"
2195 for node_name in hooks_results:
2196 res = hooks_results[node_name]
2198 test = msg and not res.offline
2199 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2200 "Communication failure in hooks execution: %s", msg)
2201 if res.offline or msg:
2202 # No need to investigate payload if node is offline or gave an error.
2203 # override manually lu_result here as _ErrorIf only
2204 # overrides self.bad
2207 for script, hkr, output in res.payload:
2208 test = hkr == constants.HKR_FAIL
2209 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2210 "Script %s failed, output:", script)
2212 output = indent_re.sub(' ', output)
2213 feedback_fn("%s" % output)
2219 class LUVerifyDisks(NoHooksLU):
2220 """Verifies the cluster disks status.
2226 def ExpandNames(self):
2227 self.needed_locks = {
2228 locking.LEVEL_NODE: locking.ALL_SET,
2229 locking.LEVEL_INSTANCE: locking.ALL_SET,
2231 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2233 def Exec(self, feedback_fn):
2234 """Verify integrity of cluster disks.
2236 @rtype: tuple of three items
2237 @return: a tuple of (dict of node-to-node_error, list of instances
2238 which need activate-disks, dict of instance: (node, volume) for
2242 result = res_nodes, res_instances, res_missing = {}, [], {}
2244 vg_name = self.cfg.GetVGName()
2245 nodes = utils.NiceSort(self.cfg.GetNodeList())
2246 instances = [self.cfg.GetInstanceInfo(name)
2247 for name in self.cfg.GetInstanceList()]
2250 for inst in instances:
2252 if (not inst.admin_up or
2253 inst.disk_template not in constants.DTS_NET_MIRROR):
2255 inst.MapLVsByNode(inst_lvs)
2256 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2257 for node, vol_list in inst_lvs.iteritems():
2258 for vol in vol_list:
2259 nv_dict[(node, vol)] = inst
2264 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2268 node_res = node_lvs[node]
2269 if node_res.offline:
2271 msg = node_res.fail_msg
2273 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2274 res_nodes[node] = msg
2277 lvs = node_res.payload
2278 for lv_name, (_, _, lv_online) in lvs.items():
2279 inst = nv_dict.pop((node, lv_name), None)
2280 if (not lv_online and inst is not None
2281 and inst.name not in res_instances):
2282 res_instances.append(inst.name)
2284 # any leftover items in nv_dict are missing LVs, let's arrange the
2286 for key, inst in nv_dict.iteritems():
2287 if inst.name not in res_missing:
2288 res_missing[inst.name] = []
2289 res_missing[inst.name].append(key)
2294 class LURepairDiskSizes(NoHooksLU):
2295 """Verifies the cluster disks sizes.
2298 _OP_REQP = [("instances", _TListOf(_TNonEmptyString))]
2301 def ExpandNames(self):
2302 if self.op.instances:
2303 self.wanted_names = []
2304 for name in self.op.instances:
2305 full_name = _ExpandInstanceName(self.cfg, name)
2306 self.wanted_names.append(full_name)
2307 self.needed_locks = {
2308 locking.LEVEL_NODE: [],
2309 locking.LEVEL_INSTANCE: self.wanted_names,
2311 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2313 self.wanted_names = None
2314 self.needed_locks = {
2315 locking.LEVEL_NODE: locking.ALL_SET,
2316 locking.LEVEL_INSTANCE: locking.ALL_SET,
2318 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2320 def DeclareLocks(self, level):
2321 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2322 self._LockInstancesNodes(primary_only=True)
2324 def CheckPrereq(self):
2325 """Check prerequisites.
2327 This only checks the optional instance list against the existing names.
2330 if self.wanted_names is None:
2331 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2333 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2334 in self.wanted_names]
2336 def _EnsureChildSizes(self, disk):
2337 """Ensure children of the disk have the needed disk size.
2339 This is valid mainly for DRBD8 and fixes an issue where the
2340 children have smaller disk size.
2342 @param disk: an L{ganeti.objects.Disk} object
2345 if disk.dev_type == constants.LD_DRBD8:
2346 assert disk.children, "Empty children for DRBD8?"
2347 fchild = disk.children[0]
2348 mismatch = fchild.size < disk.size
2350 self.LogInfo("Child disk has size %d, parent %d, fixing",
2351 fchild.size, disk.size)
2352 fchild.size = disk.size
2354 # and we recurse on this child only, not on the metadev
2355 return self._EnsureChildSizes(fchild) or mismatch
2359 def Exec(self, feedback_fn):
2360 """Verify the size of cluster disks.
2363 # TODO: check child disks too
2364 # TODO: check differences in size between primary/secondary nodes
2366 for instance in self.wanted_instances:
2367 pnode = instance.primary_node
2368 if pnode not in per_node_disks:
2369 per_node_disks[pnode] = []
2370 for idx, disk in enumerate(instance.disks):
2371 per_node_disks[pnode].append((instance, idx, disk))
2374 for node, dskl in per_node_disks.items():
2375 newl = [v[2].Copy() for v in dskl]
2377 self.cfg.SetDiskID(dsk, node)
2378 result = self.rpc.call_blockdev_getsizes(node, newl)
2380 self.LogWarning("Failure in blockdev_getsizes call to node"
2381 " %s, ignoring", node)
2383 if len(result.data) != len(dskl):
2384 self.LogWarning("Invalid result from node %s, ignoring node results",
2387 for ((instance, idx, disk), size) in zip(dskl, result.data):
2389 self.LogWarning("Disk %d of instance %s did not return size"
2390 " information, ignoring", idx, instance.name)
2392 if not isinstance(size, (int, long)):
2393 self.LogWarning("Disk %d of instance %s did not return valid"
2394 " size information, ignoring", idx, instance.name)
2397 if size != disk.size:
2398 self.LogInfo("Disk %d of instance %s has mismatched size,"
2399 " correcting: recorded %d, actual %d", idx,
2400 instance.name, disk.size, size)
2402 self.cfg.Update(instance, feedback_fn)
2403 changed.append((instance.name, idx, size))
2404 if self._EnsureChildSizes(disk):
2405 self.cfg.Update(instance, feedback_fn)
2406 changed.append((instance.name, idx, disk.size))
2410 class LURenameCluster(LogicalUnit):
2411 """Rename the cluster.
2414 HPATH = "cluster-rename"
2415 HTYPE = constants.HTYPE_CLUSTER
2416 _OP_REQP = [("name", _TNonEmptyString)]
2418 def BuildHooksEnv(self):
2423 "OP_TARGET": self.cfg.GetClusterName(),
2424 "NEW_NAME": self.op.name,
2426 mn = self.cfg.GetMasterNode()
2427 all_nodes = self.cfg.GetNodeList()
2428 return env, [mn], all_nodes
2430 def CheckPrereq(self):
2431 """Verify that the passed name is a valid one.
2434 hostname = utils.GetHostInfo(self.op.name)
2436 new_name = hostname.name
2437 self.ip = new_ip = hostname.ip
2438 old_name = self.cfg.GetClusterName()
2439 old_ip = self.cfg.GetMasterIP()
2440 if new_name == old_name and new_ip == old_ip:
2441 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2442 " cluster has changed",
2444 if new_ip != old_ip:
2445 if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2446 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2447 " reachable on the network. Aborting." %
2448 new_ip, errors.ECODE_NOTUNIQUE)
2450 self.op.name = new_name
2452 def Exec(self, feedback_fn):
2453 """Rename the cluster.
2456 clustername = self.op.name
2459 # shutdown the master IP
2460 master = self.cfg.GetMasterNode()
2461 result = self.rpc.call_node_stop_master(master, False)
2462 result.Raise("Could not disable the master role")
2465 cluster = self.cfg.GetClusterInfo()
2466 cluster.cluster_name = clustername
2467 cluster.master_ip = ip
2468 self.cfg.Update(cluster, feedback_fn)
2470 # update the known hosts file
2471 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2472 node_list = self.cfg.GetNodeList()
2474 node_list.remove(master)
2477 result = self.rpc.call_upload_file(node_list,
2478 constants.SSH_KNOWN_HOSTS_FILE)
2479 for to_node, to_result in result.iteritems():
2480 msg = to_result.fail_msg
2482 msg = ("Copy of file %s to node %s failed: %s" %
2483 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2484 self.proc.LogWarning(msg)
2487 result = self.rpc.call_node_start_master(master, False, False)
2488 msg = result.fail_msg
2490 self.LogWarning("Could not re-enable the master role on"
2491 " the master, please restart manually: %s", msg)
2494 def _RecursiveCheckIfLVMBased(disk):
2495 """Check if the given disk or its children are lvm-based.
2497 @type disk: L{objects.Disk}
2498 @param disk: the disk to check
2500 @return: boolean indicating whether a LD_LV dev_type was found or not
2504 for chdisk in disk.children:
2505 if _RecursiveCheckIfLVMBased(chdisk):
2507 return disk.dev_type == constants.LD_LV
2510 class LUSetClusterParams(LogicalUnit):
2511 """Change the parameters of the cluster.
2514 HPATH = "cluster-modify"
2515 HTYPE = constants.HTYPE_CLUSTER
2517 ("hvparams", _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2518 ("os_hvp", _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2519 ("osparams", _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2520 ("enabled_hypervisors",
2521 _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2524 ("candidate_pool_size", None),
2527 ("remove_uids", None),
2533 def CheckArguments(self):
2537 if self.op.candidate_pool_size is not None:
2539 self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2540 except (ValueError, TypeError), err:
2541 raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2542 str(err), errors.ECODE_INVAL)
2543 if self.op.candidate_pool_size < 1:
2544 raise errors.OpPrereqError("At least one master candidate needed",
2547 _CheckBooleanOpField(self.op, "maintain_node_health")
2549 if self.op.uid_pool:
2550 uidpool.CheckUidPool(self.op.uid_pool)
2552 if self.op.add_uids:
2553 uidpool.CheckUidPool(self.op.add_uids)
2555 if self.op.remove_uids:
2556 uidpool.CheckUidPool(self.op.remove_uids)
2558 def ExpandNames(self):
2559 # FIXME: in the future maybe other cluster params won't require checking on
2560 # all nodes to be modified.
2561 self.needed_locks = {
2562 locking.LEVEL_NODE: locking.ALL_SET,
2564 self.share_locks[locking.LEVEL_NODE] = 1
2566 def BuildHooksEnv(self):
2571 "OP_TARGET": self.cfg.GetClusterName(),
2572 "NEW_VG_NAME": self.op.vg_name,
2574 mn = self.cfg.GetMasterNode()
2575 return env, [mn], [mn]
2577 def CheckPrereq(self):
2578 """Check prerequisites.
2580 This checks whether the given params don't conflict and
2581 if the given volume group is valid.
2584 if self.op.vg_name is not None and not self.op.vg_name:
2585 instances = self.cfg.GetAllInstancesInfo().values()
2586 for inst in instances:
2587 for disk in inst.disks:
2588 if _RecursiveCheckIfLVMBased(disk):
2589 raise errors.OpPrereqError("Cannot disable lvm storage while"
2590 " lvm-based instances exist",
2593 node_list = self.acquired_locks[locking.LEVEL_NODE]
2595 # if vg_name not None, checks given volume group on all nodes
2597 vglist = self.rpc.call_vg_list(node_list)
2598 for node in node_list:
2599 msg = vglist[node].fail_msg
2601 # ignoring down node
2602 self.LogWarning("Error while gathering data on node %s"
2603 " (ignoring node): %s", node, msg)
2605 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2607 constants.MIN_VG_SIZE)
2609 raise errors.OpPrereqError("Error on node '%s': %s" %
2610 (node, vgstatus), errors.ECODE_ENVIRON)
2612 self.cluster = cluster = self.cfg.GetClusterInfo()
2613 # validate params changes
2614 if self.op.beparams:
2615 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2616 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2618 if self.op.nicparams:
2619 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2620 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2621 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2624 # check all instances for consistency
2625 for instance in self.cfg.GetAllInstancesInfo().values():
2626 for nic_idx, nic in enumerate(instance.nics):
2627 params_copy = copy.deepcopy(nic.nicparams)
2628 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2630 # check parameter syntax
2632 objects.NIC.CheckParameterSyntax(params_filled)
2633 except errors.ConfigurationError, err:
2634 nic_errors.append("Instance %s, nic/%d: %s" %
2635 (instance.name, nic_idx, err))
2637 # if we're moving instances to routed, check that they have an ip
2638 target_mode = params_filled[constants.NIC_MODE]
2639 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2640 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2641 (instance.name, nic_idx))
2643 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2644 "\n".join(nic_errors))
2646 # hypervisor list/parameters
2647 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2648 if self.op.hvparams:
2649 for hv_name, hv_dict in self.op.hvparams.items():
2650 if hv_name not in self.new_hvparams:
2651 self.new_hvparams[hv_name] = hv_dict
2653 self.new_hvparams[hv_name].update(hv_dict)
2655 # os hypervisor parameters
2656 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2658 for os_name, hvs in self.op.os_hvp.items():
2659 if os_name not in self.new_os_hvp:
2660 self.new_os_hvp[os_name] = hvs
2662 for hv_name, hv_dict in hvs.items():
2663 if hv_name not in self.new_os_hvp[os_name]:
2664 self.new_os_hvp[os_name][hv_name] = hv_dict
2666 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2669 self.new_osp = objects.FillDict(cluster.osparams, {})
2670 if self.op.osparams:
2671 for os_name, osp in self.op.osparams.items():
2672 if os_name not in self.new_osp:
2673 self.new_osp[os_name] = {}
2675 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2678 if not self.new_osp[os_name]:
2679 # we removed all parameters
2680 del self.new_osp[os_name]
2682 # check the parameter validity (remote check)
2683 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2684 os_name, self.new_osp[os_name])
2686 # changes to the hypervisor list
2687 if self.op.enabled_hypervisors is not None:
2688 self.hv_list = self.op.enabled_hypervisors
2689 for hv in self.hv_list:
2690 # if the hypervisor doesn't already exist in the cluster
2691 # hvparams, we initialize it to empty, and then (in both
2692 # cases) we make sure to fill the defaults, as we might not
2693 # have a complete defaults list if the hypervisor wasn't
2695 if hv not in new_hvp:
2697 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2698 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2700 self.hv_list = cluster.enabled_hypervisors
2702 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2703 # either the enabled list has changed, or the parameters have, validate
2704 for hv_name, hv_params in self.new_hvparams.items():
2705 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2706 (self.op.enabled_hypervisors and
2707 hv_name in self.op.enabled_hypervisors)):
2708 # either this is a new hypervisor, or its parameters have changed
2709 hv_class = hypervisor.GetHypervisor(hv_name)
2710 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2711 hv_class.CheckParameterSyntax(hv_params)
2712 _CheckHVParams(self, node_list, hv_name, hv_params)
2715 # no need to check any newly-enabled hypervisors, since the
2716 # defaults have already been checked in the above code-block
2717 for os_name, os_hvp in self.new_os_hvp.items():
2718 for hv_name, hv_params in os_hvp.items():
2719 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2720 # we need to fill in the new os_hvp on top of the actual hv_p
2721 cluster_defaults = self.new_hvparams.get(hv_name, {})
2722 new_osp = objects.FillDict(cluster_defaults, hv_params)
2723 hv_class = hypervisor.GetHypervisor(hv_name)
2724 hv_class.CheckParameterSyntax(new_osp)
2725 _CheckHVParams(self, node_list, hv_name, new_osp)
2728 def Exec(self, feedback_fn):
2729 """Change the parameters of the cluster.
2732 if self.op.vg_name is not None:
2733 new_volume = self.op.vg_name
2736 if new_volume != self.cfg.GetVGName():
2737 self.cfg.SetVGName(new_volume)
2739 feedback_fn("Cluster LVM configuration already in desired"
2740 " state, not changing")
2741 if self.op.hvparams:
2742 self.cluster.hvparams = self.new_hvparams
2744 self.cluster.os_hvp = self.new_os_hvp
2745 if self.op.enabled_hypervisors is not None:
2746 self.cluster.hvparams = self.new_hvparams
2747 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2748 if self.op.beparams:
2749 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2750 if self.op.nicparams:
2751 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2752 if self.op.osparams:
2753 self.cluster.osparams = self.new_osp
2755 if self.op.candidate_pool_size is not None:
2756 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2757 # we need to update the pool size here, otherwise the save will fail
2758 _AdjustCandidatePool(self, [])
2760 if self.op.maintain_node_health is not None:
2761 self.cluster.maintain_node_health = self.op.maintain_node_health
2763 if self.op.add_uids is not None:
2764 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2766 if self.op.remove_uids is not None:
2767 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2769 if self.op.uid_pool is not None:
2770 self.cluster.uid_pool = self.op.uid_pool
2772 self.cfg.Update(self.cluster, feedback_fn)
2775 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2776 """Distribute additional files which are part of the cluster configuration.
2778 ConfigWriter takes care of distributing the config and ssconf files, but
2779 there are more files which should be distributed to all nodes. This function
2780 makes sure those are copied.
2782 @param lu: calling logical unit
2783 @param additional_nodes: list of nodes not in the config to distribute to
2786 # 1. Gather target nodes
2787 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2788 dist_nodes = lu.cfg.GetOnlineNodeList()
2789 if additional_nodes is not None:
2790 dist_nodes.extend(additional_nodes)
2791 if myself.name in dist_nodes:
2792 dist_nodes.remove(myself.name)
2794 # 2. Gather files to distribute
2795 dist_files = set([constants.ETC_HOSTS,
2796 constants.SSH_KNOWN_HOSTS_FILE,
2797 constants.RAPI_CERT_FILE,
2798 constants.RAPI_USERS_FILE,
2799 constants.CONFD_HMAC_KEY,
2800 constants.CLUSTER_DOMAIN_SECRET_FILE,
2803 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2804 for hv_name in enabled_hypervisors:
2805 hv_class = hypervisor.GetHypervisor(hv_name)
2806 dist_files.update(hv_class.GetAncillaryFiles())
2808 # 3. Perform the files upload
2809 for fname in dist_files:
2810 if os.path.exists(fname):
2811 result = lu.rpc.call_upload_file(dist_nodes, fname)
2812 for to_node, to_result in result.items():
2813 msg = to_result.fail_msg
2815 msg = ("Copy of file %s to node %s failed: %s" %
2816 (fname, to_node, msg))
2817 lu.proc.LogWarning(msg)
2820 class LURedistributeConfig(NoHooksLU):
2821 """Force the redistribution of cluster configuration.
2823 This is a very simple LU.
2829 def ExpandNames(self):
2830 self.needed_locks = {
2831 locking.LEVEL_NODE: locking.ALL_SET,
2833 self.share_locks[locking.LEVEL_NODE] = 1
2835 def Exec(self, feedback_fn):
2836 """Redistribute the configuration.
2839 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2840 _RedistributeAncillaryFiles(self)
2843 def _WaitForSync(lu, instance, disks=None, oneshot=False):
2844 """Sleep and poll for an instance's disk to sync.
2847 if not instance.disks or disks is not None and not disks:
2850 disks = _ExpandCheckDisks(instance, disks)
2853 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2855 node = instance.primary_node
2858 lu.cfg.SetDiskID(dev, node)
2860 # TODO: Convert to utils.Retry
2863 degr_retries = 10 # in seconds, as we sleep 1 second each time
2867 cumul_degraded = False
2868 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2869 msg = rstats.fail_msg
2871 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2874 raise errors.RemoteError("Can't contact node %s for mirror data,"
2875 " aborting." % node)
2878 rstats = rstats.payload
2880 for i, mstat in enumerate(rstats):
2882 lu.LogWarning("Can't compute data for node %s/%s",
2883 node, disks[i].iv_name)
2886 cumul_degraded = (cumul_degraded or
2887 (mstat.is_degraded and mstat.sync_percent is None))
2888 if mstat.sync_percent is not None:
2890 if mstat.estimated_time is not None:
2891 rem_time = ("%s remaining (estimated)" %
2892 utils.FormatSeconds(mstat.estimated_time))
2893 max_time = mstat.estimated_time
2895 rem_time = "no time estimate"
2896 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2897 (disks[i].iv_name, mstat.sync_percent, rem_time))
2899 # if we're done but degraded, let's do a few small retries, to
2900 # make sure we see a stable and not transient situation; therefore
2901 # we force restart of the loop
2902 if (done or oneshot) and cumul_degraded and degr_retries > 0:
2903 logging.info("Degraded disks found, %d retries left", degr_retries)
2911 time.sleep(min(60, max_time))
2914 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2915 return not cumul_degraded
2918 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2919 """Check that mirrors are not degraded.
2921 The ldisk parameter, if True, will change the test from the
2922 is_degraded attribute (which represents overall non-ok status for
2923 the device(s)) to the ldisk (representing the local storage status).
2926 lu.cfg.SetDiskID(dev, node)
2930 if on_primary or dev.AssembleOnSecondary():
2931 rstats = lu.rpc.call_blockdev_find(node, dev)
2932 msg = rstats.fail_msg
2934 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2936 elif not rstats.payload:
2937 lu.LogWarning("Can't find disk on node %s", node)
2941 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2943 result = result and not rstats.payload.is_degraded
2946 for child in dev.children:
2947 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2952 class LUDiagnoseOS(NoHooksLU):
2953 """Logical unit for OS diagnose/query.
2957 ("output_fields", _TListOf(_TNonEmptyString)),
2958 ("names", _TListOf(_TNonEmptyString)),
2961 _FIELDS_STATIC = utils.FieldSet()
2962 _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
2963 "parameters", "api_versions")
2965 def CheckArguments(self):
2967 raise errors.OpPrereqError("Selective OS query not supported",
2970 _CheckOutputFields(static=self._FIELDS_STATIC,
2971 dynamic=self._FIELDS_DYNAMIC,
2972 selected=self.op.output_fields)
2974 def ExpandNames(self):
2975 # Lock all nodes, in shared mode
2976 # Temporary removal of locks, should be reverted later
2977 # TODO: reintroduce locks when they are lighter-weight
2978 self.needed_locks = {}
2979 #self.share_locks[locking.LEVEL_NODE] = 1
2980 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2983 def _DiagnoseByOS(rlist):
2984 """Remaps a per-node return list into an a per-os per-node dictionary
2986 @param rlist: a map with node names as keys and OS objects as values
2989 @return: a dictionary with osnames as keys and as value another
2990 map, with nodes as keys and tuples of (path, status, diagnose,
2991 variants, parameters, api_versions) as values, eg::
2993 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
2994 (/srv/..., False, "invalid api")],
2995 "node2": [(/srv/..., True, "", [], [])]}
3000 # we build here the list of nodes that didn't fail the RPC (at RPC
3001 # level), so that nodes with a non-responding node daemon don't
3002 # make all OSes invalid
3003 good_nodes = [node_name for node_name in rlist
3004 if not rlist[node_name].fail_msg]
3005 for node_name, nr in rlist.items():
3006 if nr.fail_msg or not nr.payload:
3008 for (name, path, status, diagnose, variants,
3009 params, api_versions) in nr.payload:
3010 if name not in all_os:
3011 # build a list of nodes for this os containing empty lists
3012 # for each node in node_list
3014 for nname in good_nodes:
3015 all_os[name][nname] = []
3016 # convert params from [name, help] to (name, help)
3017 params = [tuple(v) for v in params]
3018 all_os[name][node_name].append((path, status, diagnose,
3019 variants, params, api_versions))
3022 def Exec(self, feedback_fn):
3023 """Compute the list of OSes.
3026 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3027 node_data = self.rpc.call_os_diagnose(valid_nodes)
3028 pol = self._DiagnoseByOS(node_data)
3031 for os_name, os_data in pol.items():
3034 (variants, params, api_versions) = null_state = (set(), set(), set())
3035 for idx, osl in enumerate(os_data.values()):
3036 valid = bool(valid and osl and osl[0][1])
3038 (variants, params, api_versions) = null_state
3040 node_variants, node_params, node_api = osl[0][3:6]
3041 if idx == 0: # first entry
3042 variants = set(node_variants)
3043 params = set(node_params)
3044 api_versions = set(node_api)
3045 else: # keep consistency
3046 variants.intersection_update(node_variants)
3047 params.intersection_update(node_params)
3048 api_versions.intersection_update(node_api)
3050 for field in self.op.output_fields:
3053 elif field == "valid":
3055 elif field == "node_status":
3056 # this is just a copy of the dict
3058 for node_name, nos_list in os_data.items():
3059 val[node_name] = nos_list
3060 elif field == "variants":
3061 val = list(variants)
3062 elif field == "parameters":
3064 elif field == "api_versions":
3065 val = list(api_versions)
3067 raise errors.ParameterError(field)
3074 class LURemoveNode(LogicalUnit):
3075 """Logical unit for removing a node.
3078 HPATH = "node-remove"
3079 HTYPE = constants.HTYPE_NODE
3080 _OP_REQP = [("node_name", _TNonEmptyString)]
3082 def BuildHooksEnv(self):
3085 This doesn't run on the target node in the pre phase as a failed
3086 node would then be impossible to remove.
3090 "OP_TARGET": self.op.node_name,
3091 "NODE_NAME": self.op.node_name,
3093 all_nodes = self.cfg.GetNodeList()
3095 all_nodes.remove(self.op.node_name)
3097 logging.warning("Node %s which is about to be removed not found"
3098 " in the all nodes list", self.op.node_name)
3099 return env, all_nodes, all_nodes
3101 def CheckPrereq(self):
3102 """Check prerequisites.
3105 - the node exists in the configuration
3106 - it does not have primary or secondary instances
3107 - it's not the master
3109 Any errors are signaled by raising errors.OpPrereqError.
3112 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3113 node = self.cfg.GetNodeInfo(self.op.node_name)
3114 assert node is not None
3116 instance_list = self.cfg.GetInstanceList()
3118 masternode = self.cfg.GetMasterNode()
3119 if node.name == masternode:
3120 raise errors.OpPrereqError("Node is the master node,"
3121 " you need to failover first.",
3124 for instance_name in instance_list:
3125 instance = self.cfg.GetInstanceInfo(instance_name)
3126 if node.name in instance.all_nodes:
3127 raise errors.OpPrereqError("Instance %s is still running on the node,"
3128 " please remove first." % instance_name,
3130 self.op.node_name = node.name
3133 def Exec(self, feedback_fn):
3134 """Removes the node from the cluster.
3138 logging.info("Stopping the node daemon and removing configs from node %s",
3141 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3143 # Promote nodes to master candidate as needed
3144 _AdjustCandidatePool(self, exceptions=[node.name])
3145 self.context.RemoveNode(node.name)
3147 # Run post hooks on the node before it's removed
3148 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3150 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3152 # pylint: disable-msg=W0702
3153 self.LogWarning("Errors occurred running hooks on %s" % node.name)
3155 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3156 msg = result.fail_msg
3158 self.LogWarning("Errors encountered on the remote node while leaving"
3159 " the cluster: %s", msg)
3161 # Remove node from our /etc/hosts
3162 if self.cfg.GetClusterInfo().modify_etc_hosts:
3163 # FIXME: this should be done via an rpc call to node daemon
3164 utils.RemoveHostFromEtcHosts(node.name)
3165 _RedistributeAncillaryFiles(self)
3168 class LUQueryNodes(NoHooksLU):
3169 """Logical unit for querying nodes.
3172 # pylint: disable-msg=W0142
3174 ("output_fields", _TListOf(_TNonEmptyString)),
3175 ("names", _TListOf(_TNonEmptyString)),
3176 ("use_locking", _TBool),
3180 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3181 "master_candidate", "offline", "drained"]
3183 _FIELDS_DYNAMIC = utils.FieldSet(
3185 "mtotal", "mnode", "mfree",
3187 "ctotal", "cnodes", "csockets",
3190 _FIELDS_STATIC = utils.FieldSet(*[
3191 "pinst_cnt", "sinst_cnt",
3192 "pinst_list", "sinst_list",
3193 "pip", "sip", "tags",
3195 "role"] + _SIMPLE_FIELDS
3198 def CheckArguments(self):
3199 _CheckOutputFields(static=self._FIELDS_STATIC,
3200 dynamic=self._FIELDS_DYNAMIC,
3201 selected=self.op.output_fields)
3203 def ExpandNames(self):
3204 self.needed_locks = {}
3205 self.share_locks[locking.LEVEL_NODE] = 1
3208 self.wanted = _GetWantedNodes(self, self.op.names)
3210 self.wanted = locking.ALL_SET
3212 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3213 self.do_locking = self.do_node_query and self.op.use_locking
3215 # if we don't request only static fields, we need to lock the nodes
3216 self.needed_locks[locking.LEVEL_NODE] = self.wanted
3218 def Exec(self, feedback_fn):
3219 """Computes the list of nodes and their attributes.
3222 all_info = self.cfg.GetAllNodesInfo()
3224 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3225 elif self.wanted != locking.ALL_SET:
3226 nodenames = self.wanted
3227 missing = set(nodenames).difference(all_info.keys())
3229 raise errors.OpExecError(
3230 "Some nodes were removed before retrieving their data: %s" % missing)
3232 nodenames = all_info.keys()
3234 nodenames = utils.NiceSort(nodenames)
3235 nodelist = [all_info[name] for name in nodenames]
3237 # begin data gathering
3239 if self.do_node_query:
3241 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3242 self.cfg.GetHypervisorType())
3243 for name in nodenames:
3244 nodeinfo = node_data[name]
3245 if not nodeinfo.fail_msg and nodeinfo.payload:
3246 nodeinfo = nodeinfo.payload
3247 fn = utils.TryConvert
3249 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3250 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3251 "mfree": fn(int, nodeinfo.get('memory_free', None)),
3252 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3253 "dfree": fn(int, nodeinfo.get('vg_free', None)),
3254 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3255 "bootid": nodeinfo.get('bootid', None),
3256 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3257 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3260 live_data[name] = {}
3262 live_data = dict.fromkeys(nodenames, {})
3264 node_to_primary = dict([(name, set()) for name in nodenames])
3265 node_to_secondary = dict([(name, set()) for name in nodenames])
3267 inst_fields = frozenset(("pinst_cnt", "pinst_list",
3268 "sinst_cnt", "sinst_list"))
3269 if inst_fields & frozenset(self.op.output_fields):
3270 inst_data = self.cfg.GetAllInstancesInfo()
3272 for inst in inst_data.values():
3273 if inst.primary_node in node_to_primary:
3274 node_to_primary[inst.primary_node].add(inst.name)
3275 for secnode in inst.secondary_nodes:
3276 if secnode in node_to_secondary:
3277 node_to_secondary[secnode].add(inst.name)
3279 master_node = self.cfg.GetMasterNode()
3281 # end data gathering
3284 for node in nodelist:
3286 for field in self.op.output_fields:
3287 if field in self._SIMPLE_FIELDS:
3288 val = getattr(node, field)
3289 elif field == "pinst_list":
3290 val = list(node_to_primary[node.name])
3291 elif field == "sinst_list":
3292 val = list(node_to_secondary[node.name])
3293 elif field == "pinst_cnt":
3294 val = len(node_to_primary[node.name])
3295 elif field == "sinst_cnt":
3296 val = len(node_to_secondary[node.name])
3297 elif field == "pip":
3298 val = node.primary_ip
3299 elif field == "sip":
3300 val = node.secondary_ip
3301 elif field == "tags":
3302 val = list(node.GetTags())
3303 elif field == "master":
3304 val = node.name == master_node
3305 elif self._FIELDS_DYNAMIC.Matches(field):
3306 val = live_data[node.name].get(field, None)
3307 elif field == "role":
3308 if node.name == master_node:
3310 elif node.master_candidate:
3319 raise errors.ParameterError(field)
3320 node_output.append(val)
3321 output.append(node_output)
3326 class LUQueryNodeVolumes(NoHooksLU):
3327 """Logical unit for getting volumes on node(s).
3331 ("nodes", _TListOf(_TNonEmptyString)),
3332 ("output_fields", _TListOf(_TNonEmptyString)),
3335 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3336 _FIELDS_STATIC = utils.FieldSet("node")
3338 def CheckArguments(self):
3339 _CheckOutputFields(static=self._FIELDS_STATIC,
3340 dynamic=self._FIELDS_DYNAMIC,
3341 selected=self.op.output_fields)
3343 def ExpandNames(self):
3344 self.needed_locks = {}
3345 self.share_locks[locking.LEVEL_NODE] = 1
3346 if not self.op.nodes:
3347 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3349 self.needed_locks[locking.LEVEL_NODE] = \
3350 _GetWantedNodes(self, self.op.nodes)
3352 def Exec(self, feedback_fn):
3353 """Computes the list of nodes and their attributes.
3356 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3357 volumes = self.rpc.call_node_volumes(nodenames)
3359 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3360 in self.cfg.GetInstanceList()]
3362 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3365 for node in nodenames:
3366 nresult = volumes[node]
3369 msg = nresult.fail_msg
3371 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3374 node_vols = nresult.payload[:]
3375 node_vols.sort(key=lambda vol: vol['dev'])
3377 for vol in node_vols:
3379 for field in self.op.output_fields:
3382 elif field == "phys":
3386 elif field == "name":
3388 elif field == "size":
3389 val = int(float(vol['size']))
3390 elif field == "instance":
3392 if node not in lv_by_node[inst]:
3394 if vol['name'] in lv_by_node[inst][node]:
3400 raise errors.ParameterError(field)
3401 node_output.append(str(val))
3403 output.append(node_output)
3408 class LUQueryNodeStorage(NoHooksLU):
3409 """Logical unit for getting information on storage units on node(s).
3412 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3414 ("nodes", _TListOf(_TNonEmptyString)),
3415 ("storage_type", _CheckStorageType),
3416 ("output_fields", _TListOf(_TNonEmptyString)),
3418 _OP_DEFS = [("name", None)]
3421 def CheckArguments(self):
3422 _CheckOutputFields(static=self._FIELDS_STATIC,
3423 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3424 selected=self.op.output_fields)
3426 def ExpandNames(self):
3427 self.needed_locks = {}
3428 self.share_locks[locking.LEVEL_NODE] = 1
3431 self.needed_locks[locking.LEVEL_NODE] = \
3432 _GetWantedNodes(self, self.op.nodes)
3434 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3436 def Exec(self, feedback_fn):
3437 """Computes the list of nodes and their attributes.
3440 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3442 # Always get name to sort by
3443 if constants.SF_NAME in self.op.output_fields:
3444 fields = self.op.output_fields[:]
3446 fields = [constants.SF_NAME] + self.op.output_fields
3448 # Never ask for node or type as it's only known to the LU
3449 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3450 while extra in fields:
3451 fields.remove(extra)
3453 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3454 name_idx = field_idx[constants.SF_NAME]
3456 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3457 data = self.rpc.call_storage_list(self.nodes,
3458 self.op.storage_type, st_args,
3459 self.op.name, fields)
3463 for node in utils.NiceSort(self.nodes):
3464 nresult = data[node]
3468 msg = nresult.fail_msg
3470 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3473 rows = dict([(row[name_idx], row) for row in nresult.payload])
3475 for name in utils.NiceSort(rows.keys()):
3480 for field in self.op.output_fields:
3481 if field == constants.SF_NODE:
3483 elif field == constants.SF_TYPE:
3484 val = self.op.storage_type
3485 elif field in field_idx:
3486 val = row[field_idx[field]]
3488 raise errors.ParameterError(field)
3497 class LUModifyNodeStorage(NoHooksLU):
3498 """Logical unit for modifying a storage volume on a node.
3502 ("node_name", _TNonEmptyString),
3503 ("storage_type", _CheckStorageType),
3504 ("name", _TNonEmptyString),
3505 ("changes", _TDict),
3509 def CheckArguments(self):
3510 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3512 storage_type = self.op.storage_type
3515 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3517 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3518 " modified" % storage_type,
3521 diff = set(self.op.changes.keys()) - modifiable
3523 raise errors.OpPrereqError("The following fields can not be modified for"
3524 " storage units of type '%s': %r" %
3525 (storage_type, list(diff)),
3528 def ExpandNames(self):
3529 self.needed_locks = {
3530 locking.LEVEL_NODE: self.op.node_name,
3533 def Exec(self, feedback_fn):
3534 """Computes the list of nodes and their attributes.
3537 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3538 result = self.rpc.call_storage_modify(self.op.node_name,
3539 self.op.storage_type, st_args,
3540 self.op.name, self.op.changes)
3541 result.Raise("Failed to modify storage unit '%s' on %s" %
3542 (self.op.name, self.op.node_name))
3545 class LUAddNode(LogicalUnit):
3546 """Logical unit for adding node to the cluster.
3550 HTYPE = constants.HTYPE_NODE
3552 ("node_name", _TNonEmptyString),
3554 _OP_DEFS = [("secondary_ip", None)]
3556 def CheckArguments(self):
3557 # validate/normalize the node name
3558 self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3560 def BuildHooksEnv(self):
3563 This will run on all nodes before, and on all nodes + the new node after.
3567 "OP_TARGET": self.op.node_name,
3568 "NODE_NAME": self.op.node_name,
3569 "NODE_PIP": self.op.primary_ip,
3570 "NODE_SIP": self.op.secondary_ip,
3572 nodes_0 = self.cfg.GetNodeList()
3573 nodes_1 = nodes_0 + [self.op.node_name, ]
3574 return env, nodes_0, nodes_1
3576 def CheckPrereq(self):
3577 """Check prerequisites.
3580 - the new node is not already in the config
3582 - its parameters (single/dual homed) matches the cluster
3584 Any errors are signaled by raising errors.OpPrereqError.
3587 node_name = self.op.node_name
3590 dns_data = utils.GetHostInfo(node_name)
3592 node = dns_data.name
3593 primary_ip = self.op.primary_ip = dns_data.ip
3594 if self.op.secondary_ip is None:
3595 self.op.secondary_ip = primary_ip
3596 if not utils.IsValidIP(self.op.secondary_ip):
3597 raise errors.OpPrereqError("Invalid secondary IP given",
3599 secondary_ip = self.op.secondary_ip
3601 node_list = cfg.GetNodeList()
3602 if not self.op.readd and node in node_list:
3603 raise errors.OpPrereqError("Node %s is already in the configuration" %
3604 node, errors.ECODE_EXISTS)
3605 elif self.op.readd and node not in node_list:
3606 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3609 self.changed_primary_ip = False
3611 for existing_node_name in node_list:
3612 existing_node = cfg.GetNodeInfo(existing_node_name)
3614 if self.op.readd and node == existing_node_name:
3615 if existing_node.secondary_ip != secondary_ip:
3616 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3617 " address configuration as before",
3619 if existing_node.primary_ip != primary_ip:
3620 self.changed_primary_ip = True
3624 if (existing_node.primary_ip == primary_ip or
3625 existing_node.secondary_ip == primary_ip or
3626 existing_node.primary_ip == secondary_ip or
3627 existing_node.secondary_ip == secondary_ip):
3628 raise errors.OpPrereqError("New node ip address(es) conflict with"
3629 " existing node %s" % existing_node.name,
3630 errors.ECODE_NOTUNIQUE)
3632 # check that the type of the node (single versus dual homed) is the
3633 # same as for the master
3634 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3635 master_singlehomed = myself.secondary_ip == myself.primary_ip
3636 newbie_singlehomed = secondary_ip == primary_ip
3637 if master_singlehomed != newbie_singlehomed:
3638 if master_singlehomed:
3639 raise errors.OpPrereqError("The master has no private ip but the"
3640 " new node has one",
3643 raise errors.OpPrereqError("The master has a private ip but the"
3644 " new node doesn't have one",
3647 # checks reachability
3648 if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3649 raise errors.OpPrereqError("Node not reachable by ping",
3650 errors.ECODE_ENVIRON)
3652 if not newbie_singlehomed:
3653 # check reachability from my secondary ip to newbie's secondary ip
3654 if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3655 source=myself.secondary_ip):
3656 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3657 " based ping to noded port",
3658 errors.ECODE_ENVIRON)
3665 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3668 self.new_node = self.cfg.GetNodeInfo(node)
3669 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3671 self.new_node = objects.Node(name=node,
3672 primary_ip=primary_ip,
3673 secondary_ip=secondary_ip,
3674 master_candidate=self.master_candidate,
3675 offline=False, drained=False)
3677 def Exec(self, feedback_fn):
3678 """Adds the new node to the cluster.
3681 new_node = self.new_node
3682 node = new_node.name
3684 # for re-adds, reset the offline/drained/master-candidate flags;
3685 # we need to reset here, otherwise offline would prevent RPC calls
3686 # later in the procedure; this also means that if the re-add
3687 # fails, we are left with a non-offlined, broken node
3689 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3690 self.LogInfo("Readding a node, the offline/drained flags were reset")
3691 # if we demote the node, we do cleanup later in the procedure
3692 new_node.master_candidate = self.master_candidate
3693 if self.changed_primary_ip:
3694 new_node.primary_ip = self.op.primary_ip
3696 # notify the user about any possible mc promotion
3697 if new_node.master_candidate:
3698 self.LogInfo("Node will be a master candidate")
3700 # check connectivity
3701 result = self.rpc.call_version([node])[node]
3702 result.Raise("Can't get version information from node %s" % node)
3703 if constants.PROTOCOL_VERSION == result.payload:
3704 logging.info("Communication to node %s fine, sw version %s match",
3705 node, result.payload)
3707 raise errors.OpExecError("Version mismatch master version %s,"
3708 " node version %s" %
3709 (constants.PROTOCOL_VERSION, result.payload))
3712 if self.cfg.GetClusterInfo().modify_ssh_setup:
3713 logging.info("Copy ssh key to node %s", node)
3714 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3716 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3717 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3721 keyarray.append(utils.ReadFile(i))
3723 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3724 keyarray[2], keyarray[3], keyarray[4],
3726 result.Raise("Cannot transfer ssh keys to the new node")
3728 # Add node to our /etc/hosts, and add key to known_hosts
3729 if self.cfg.GetClusterInfo().modify_etc_hosts:
3730 # FIXME: this should be done via an rpc call to node daemon
3731 utils.AddHostToEtcHosts(new_node.name)
3733 if new_node.secondary_ip != new_node.primary_ip:
3734 result = self.rpc.call_node_has_ip_address(new_node.name,
3735 new_node.secondary_ip)
3736 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3737 prereq=True, ecode=errors.ECODE_ENVIRON)
3738 if not result.payload:
3739 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3740 " you gave (%s). Please fix and re-run this"
3741 " command." % new_node.secondary_ip)
3743 node_verify_list = [self.cfg.GetMasterNode()]
3744 node_verify_param = {
3745 constants.NV_NODELIST: [node],
3746 # TODO: do a node-net-test as well?
3749 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3750 self.cfg.GetClusterName())
3751 for verifier in node_verify_list:
3752 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3753 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3755 for failed in nl_payload:
3756 feedback_fn("ssh/hostname verification failed"
3757 " (checking from %s): %s" %
3758 (verifier, nl_payload[failed]))
3759 raise errors.OpExecError("ssh/hostname verification failed.")
3762 _RedistributeAncillaryFiles(self)
3763 self.context.ReaddNode(new_node)
3764 # make sure we redistribute the config
3765 self.cfg.Update(new_node, feedback_fn)
3766 # and make sure the new node will not have old files around
3767 if not new_node.master_candidate:
3768 result = self.rpc.call_node_demote_from_mc(new_node.name)
3769 msg = result.fail_msg
3771 self.LogWarning("Node failed to demote itself from master"
3772 " candidate status: %s" % msg)
3774 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3775 self.context.AddNode(new_node, self.proc.GetECId())
3778 class LUSetNodeParams(LogicalUnit):
3779 """Modifies the parameters of a node.
3782 HPATH = "node-modify"
3783 HTYPE = constants.HTYPE_NODE
3784 _OP_REQP = [("node_name", _TNonEmptyString)]
3787 def CheckArguments(self):
3788 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3789 _CheckBooleanOpField(self.op, 'master_candidate')
3790 _CheckBooleanOpField(self.op, 'offline')
3791 _CheckBooleanOpField(self.op, 'drained')
3792 _CheckBooleanOpField(self.op, 'auto_promote')
3793 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3794 if all_mods.count(None) == 3:
3795 raise errors.OpPrereqError("Please pass at least one modification",
3797 if all_mods.count(True) > 1:
3798 raise errors.OpPrereqError("Can't set the node into more than one"
3799 " state at the same time",
3802 # Boolean value that tells us whether we're offlining or draining the node
3803 self.offline_or_drain = (self.op.offline == True or
3804 self.op.drained == True)
3805 self.deoffline_or_drain = (self.op.offline == False or
3806 self.op.drained == False)
3807 self.might_demote = (self.op.master_candidate == False or
3808 self.offline_or_drain)
3810 self.lock_all = self.op.auto_promote and self.might_demote
3813 def ExpandNames(self):
3815 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3817 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3819 def BuildHooksEnv(self):
3822 This runs on the master node.
3826 "OP_TARGET": self.op.node_name,
3827 "MASTER_CANDIDATE": str(self.op.master_candidate),
3828 "OFFLINE": str(self.op.offline),
3829 "DRAINED": str(self.op.drained),
3831 nl = [self.cfg.GetMasterNode(),
3835 def CheckPrereq(self):
3836 """Check prerequisites.
3838 This only checks the instance list against the existing names.
3841 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3843 if (self.op.master_candidate is not None or
3844 self.op.drained is not None or
3845 self.op.offline is not None):
3846 # we can't change the master's node flags
3847 if self.op.node_name == self.cfg.GetMasterNode():
3848 raise errors.OpPrereqError("The master role can be changed"
3849 " only via masterfailover",
3853 if node.master_candidate and self.might_demote and not self.lock_all:
3854 assert not self.op.auto_promote, "auto-promote set but lock_all not"
3855 # check if after removing the current node, we're missing master
3857 (mc_remaining, mc_should, _) = \
3858 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3859 if mc_remaining < mc_should:
3860 raise errors.OpPrereqError("Not enough master candidates, please"
3861 " pass auto_promote to allow promotion",
3864 if (self.op.master_candidate == True and
3865 ((node.offline and not self.op.offline == False) or
3866 (node.drained and not self.op.drained == False))):
3867 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3868 " to master_candidate" % node.name,
3871 # If we're being deofflined/drained, we'll MC ourself if needed
3872 if (self.deoffline_or_drain and not self.offline_or_drain and not
3873 self.op.master_candidate == True and not node.master_candidate):
3874 self.op.master_candidate = _DecideSelfPromotion(self)
3875 if self.op.master_candidate:
3876 self.LogInfo("Autopromoting node to master candidate")
3880 def Exec(self, feedback_fn):
3889 if self.op.offline is not None:
3890 node.offline = self.op.offline
3891 result.append(("offline", str(self.op.offline)))
3892 if self.op.offline == True:
3893 if node.master_candidate:
3894 node.master_candidate = False
3896 result.append(("master_candidate", "auto-demotion due to offline"))
3898 node.drained = False
3899 result.append(("drained", "clear drained status due to offline"))
3901 if self.op.master_candidate is not None:
3902 node.master_candidate = self.op.master_candidate
3904 result.append(("master_candidate", str(self.op.master_candidate)))
3905 if self.op.master_candidate == False:
3906 rrc = self.rpc.call_node_demote_from_mc(node.name)
3909 self.LogWarning("Node failed to demote itself: %s" % msg)
3911 if self.op.drained is not None:
3912 node.drained = self.op.drained
3913 result.append(("drained", str(self.op.drained)))
3914 if self.op.drained == True:
3915 if node.master_candidate:
3916 node.master_candidate = False
3918 result.append(("master_candidate", "auto-demotion due to drain"))
3919 rrc = self.rpc.call_node_demote_from_mc(node.name)
3922 self.LogWarning("Node failed to demote itself: %s" % msg)
3924 node.offline = False
3925 result.append(("offline", "clear offline status due to drain"))
3927 # we locked all nodes, we adjust the CP before updating this node
3929 _AdjustCandidatePool(self, [node.name])
3931 # this will trigger configuration file update, if needed
3932 self.cfg.Update(node, feedback_fn)
3934 # this will trigger job queue propagation or cleanup
3936 self.context.ReaddNode(node)
3941 class LUPowercycleNode(NoHooksLU):
3942 """Powercycles a node.
3946 ("node_name", _TNonEmptyString),
3951 def CheckArguments(self):
3952 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3953 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3954 raise errors.OpPrereqError("The node is the master and the force"
3955 " parameter was not set",
3958 def ExpandNames(self):
3959 """Locking for PowercycleNode.
3961 This is a last-resort option and shouldn't block on other
3962 jobs. Therefore, we grab no locks.
3965 self.needed_locks = {}
3967 def Exec(self, feedback_fn):
3971 result = self.rpc.call_node_powercycle(self.op.node_name,
3972 self.cfg.GetHypervisorType())
3973 result.Raise("Failed to schedule the reboot")
3974 return result.payload
3977 class LUQueryClusterInfo(NoHooksLU):
3978 """Query cluster configuration.
3984 def ExpandNames(self):
3985 self.needed_locks = {}
3987 def Exec(self, feedback_fn):
3988 """Return cluster config.
3991 cluster = self.cfg.GetClusterInfo()
3994 # Filter just for enabled hypervisors
3995 for os_name, hv_dict in cluster.os_hvp.items():
3996 os_hvp[os_name] = {}
3997 for hv_name, hv_params in hv_dict.items():
3998 if hv_name in cluster.enabled_hypervisors:
3999 os_hvp[os_name][hv_name] = hv_params
4002 "software_version": constants.RELEASE_VERSION,
4003 "protocol_version": constants.PROTOCOL_VERSION,
4004 "config_version": constants.CONFIG_VERSION,
4005 "os_api_version": max(constants.OS_API_VERSIONS),
4006 "export_version": constants.EXPORT_VERSION,
4007 "architecture": (platform.architecture()[0], platform.machine()),
4008 "name": cluster.cluster_name,
4009 "master": cluster.master_node,
4010 "default_hypervisor": cluster.enabled_hypervisors[0],
4011 "enabled_hypervisors": cluster.enabled_hypervisors,
4012 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4013 for hypervisor_name in cluster.enabled_hypervisors]),
4015 "beparams": cluster.beparams,
4016 "osparams": cluster.osparams,
4017 "nicparams": cluster.nicparams,
4018 "candidate_pool_size": cluster.candidate_pool_size,
4019 "master_netdev": cluster.master_netdev,
4020 "volume_group_name": cluster.volume_group_name,
4021 "file_storage_dir": cluster.file_storage_dir,
4022 "maintain_node_health": cluster.maintain_node_health,
4023 "ctime": cluster.ctime,
4024 "mtime": cluster.mtime,
4025 "uuid": cluster.uuid,
4026 "tags": list(cluster.GetTags()),
4027 "uid_pool": cluster.uid_pool,
4033 class LUQueryConfigValues(NoHooksLU):
4034 """Return configuration values.
4039 _FIELDS_DYNAMIC = utils.FieldSet()
4040 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4043 def CheckArguments(self):
4044 _CheckOutputFields(static=self._FIELDS_STATIC,
4045 dynamic=self._FIELDS_DYNAMIC,
4046 selected=self.op.output_fields)
4048 def ExpandNames(self):
4049 self.needed_locks = {}
4051 def Exec(self, feedback_fn):
4052 """Dump a representation of the cluster config to the standard output.
4056 for field in self.op.output_fields:
4057 if field == "cluster_name":
4058 entry = self.cfg.GetClusterName()
4059 elif field == "master_node":
4060 entry = self.cfg.GetMasterNode()
4061 elif field == "drain_flag":
4062 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4063 elif field == "watcher_pause":
4064 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4066 raise errors.ParameterError(field)
4067 values.append(entry)
4071 class LUActivateInstanceDisks(NoHooksLU):
4072 """Bring up an instance's disks.
4075 _OP_REQP = [("instance_name", _TNonEmptyString)]
4076 _OP_DEFS = [("ignore_size", False)]
4079 def ExpandNames(self):
4080 self._ExpandAndLockInstance()
4081 self.needed_locks[locking.LEVEL_NODE] = []
4082 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4084 def DeclareLocks(self, level):
4085 if level == locking.LEVEL_NODE:
4086 self._LockInstancesNodes()
4088 def CheckPrereq(self):
4089 """Check prerequisites.
4091 This checks that the instance is in the cluster.
4094 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4095 assert self.instance is not None, \
4096 "Cannot retrieve locked instance %s" % self.op.instance_name
4097 _CheckNodeOnline(self, self.instance.primary_node)
4099 def Exec(self, feedback_fn):
4100 """Activate the disks.
4103 disks_ok, disks_info = \
4104 _AssembleInstanceDisks(self, self.instance,
4105 ignore_size=self.op.ignore_size)
4107 raise errors.OpExecError("Cannot activate block devices")
4112 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4114 """Prepare the block devices for an instance.
4116 This sets up the block devices on all nodes.
4118 @type lu: L{LogicalUnit}
4119 @param lu: the logical unit on whose behalf we execute
4120 @type instance: L{objects.Instance}
4121 @param instance: the instance for whose disks we assemble
4122 @type disks: list of L{objects.Disk} or None
4123 @param disks: which disks to assemble (or all, if None)
4124 @type ignore_secondaries: boolean
4125 @param ignore_secondaries: if true, errors on secondary nodes
4126 won't result in an error return from the function
4127 @type ignore_size: boolean
4128 @param ignore_size: if true, the current known size of the disk
4129 will not be used during the disk activation, useful for cases
4130 when the size is wrong
4131 @return: False if the operation failed, otherwise a list of
4132 (host, instance_visible_name, node_visible_name)
4133 with the mapping from node devices to instance devices
4138 iname = instance.name
4139 disks = _ExpandCheckDisks(instance, disks)
4141 # With the two passes mechanism we try to reduce the window of
4142 # opportunity for the race condition of switching DRBD to primary
4143 # before handshaking occured, but we do not eliminate it
4145 # The proper fix would be to wait (with some limits) until the
4146 # connection has been made and drbd transitions from WFConnection
4147 # into any other network-connected state (Connected, SyncTarget,
4150 # 1st pass, assemble on all nodes in secondary mode
4151 for inst_disk in disks:
4152 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4154 node_disk = node_disk.Copy()
4155 node_disk.UnsetSize()
4156 lu.cfg.SetDiskID(node_disk, node)
4157 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4158 msg = result.fail_msg
4160 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4161 " (is_primary=False, pass=1): %s",
4162 inst_disk.iv_name, node, msg)
4163 if not ignore_secondaries:
4166 # FIXME: race condition on drbd migration to primary
4168 # 2nd pass, do only the primary node
4169 for inst_disk in disks:
4172 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4173 if node != instance.primary_node:
4176 node_disk = node_disk.Copy()
4177 node_disk.UnsetSize()
4178 lu.cfg.SetDiskID(node_disk, node)
4179 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4180 msg = result.fail_msg
4182 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4183 " (is_primary=True, pass=2): %s",
4184 inst_disk.iv_name, node, msg)
4187 dev_path = result.payload
4189 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4191 # leave the disks configured for the primary node
4192 # this is a workaround that would be fixed better by
4193 # improving the logical/physical id handling
4195 lu.cfg.SetDiskID(disk, instance.primary_node)
4197 return disks_ok, device_info
4200 def _StartInstanceDisks(lu, instance, force):
4201 """Start the disks of an instance.
4204 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4205 ignore_secondaries=force)
4207 _ShutdownInstanceDisks(lu, instance)
4208 if force is not None and not force:
4209 lu.proc.LogWarning("", hint="If the message above refers to a"
4211 " you can retry the operation using '--force'.")
4212 raise errors.OpExecError("Disk consistency error")
4215 class LUDeactivateInstanceDisks(NoHooksLU):
4216 """Shutdown an instance's disks.
4219 _OP_REQP = [("instance_name", _TNonEmptyString)]
4222 def ExpandNames(self):
4223 self._ExpandAndLockInstance()
4224 self.needed_locks[locking.LEVEL_NODE] = []
4225 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4227 def DeclareLocks(self, level):
4228 if level == locking.LEVEL_NODE:
4229 self._LockInstancesNodes()
4231 def CheckPrereq(self):
4232 """Check prerequisites.
4234 This checks that the instance is in the cluster.
4237 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4238 assert self.instance is not None, \
4239 "Cannot retrieve locked instance %s" % self.op.instance_name
4241 def Exec(self, feedback_fn):
4242 """Deactivate the disks
4245 instance = self.instance
4246 _SafeShutdownInstanceDisks(self, instance)
4249 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4250 """Shutdown block devices of an instance.
4252 This function checks if an instance is running, before calling
4253 _ShutdownInstanceDisks.
4256 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4257 _ShutdownInstanceDisks(lu, instance, disks=disks)
4260 def _ExpandCheckDisks(instance, disks):
4261 """Return the instance disks selected by the disks list
4263 @type disks: list of L{objects.Disk} or None
4264 @param disks: selected disks
4265 @rtype: list of L{objects.Disk}
4266 @return: selected instance disks to act on
4270 return instance.disks
4272 if not set(disks).issubset(instance.disks):
4273 raise errors.ProgrammerError("Can only act on disks belonging to the"
4278 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4279 """Shutdown block devices of an instance.
4281 This does the shutdown on all nodes of the instance.
4283 If the ignore_primary is false, errors on the primary node are
4288 disks = _ExpandCheckDisks(instance, disks)
4291 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4292 lu.cfg.SetDiskID(top_disk, node)
4293 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4294 msg = result.fail_msg
4296 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4297 disk.iv_name, node, msg)
4298 if not ignore_primary or node != instance.primary_node:
4303 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4304 """Checks if a node has enough free memory.
4306 This function check if a given node has the needed amount of free
4307 memory. In case the node has less memory or we cannot get the
4308 information from the node, this function raise an OpPrereqError
4311 @type lu: C{LogicalUnit}
4312 @param lu: a logical unit from which we get configuration data
4314 @param node: the node to check
4315 @type reason: C{str}
4316 @param reason: string to use in the error message
4317 @type requested: C{int}
4318 @param requested: the amount of memory in MiB to check for
4319 @type hypervisor_name: C{str}
4320 @param hypervisor_name: the hypervisor to ask for memory stats
4321 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4322 we cannot check the node
4325 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4326 nodeinfo[node].Raise("Can't get data from node %s" % node,
4327 prereq=True, ecode=errors.ECODE_ENVIRON)
4328 free_mem = nodeinfo[node].payload.get('memory_free', None)
4329 if not isinstance(free_mem, int):
4330 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4331 " was '%s'" % (node, free_mem),
4332 errors.ECODE_ENVIRON)
4333 if requested > free_mem:
4334 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4335 " needed %s MiB, available %s MiB" %
4336 (node, reason, requested, free_mem),
4340 def _CheckNodesFreeDisk(lu, nodenames, requested):
4341 """Checks if nodes have enough free disk space in the default VG.
4343 This function check if all given nodes have the needed amount of
4344 free disk. In case any node has less disk or we cannot get the
4345 information from the node, this function raise an OpPrereqError
4348 @type lu: C{LogicalUnit}
4349 @param lu: a logical unit from which we get configuration data
4350 @type nodenames: C{list}
4351 @param nodenames: the list of node names to check
4352 @type requested: C{int}
4353 @param requested: the amount of disk in MiB to check for
4354 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4355 we cannot check the node
4358 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4359 lu.cfg.GetHypervisorType())
4360 for node in nodenames:
4361 info = nodeinfo[node]
4362 info.Raise("Cannot get current information from node %s" % node,
4363 prereq=True, ecode=errors.ECODE_ENVIRON)
4364 vg_free = info.payload.get("vg_free", None)
4365 if not isinstance(vg_free, int):
4366 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4367 " result was '%s'" % (node, vg_free),
4368 errors.ECODE_ENVIRON)
4369 if requested > vg_free:
4370 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4371 " required %d MiB, available %d MiB" %
4372 (node, requested, vg_free),
4376 class LUStartupInstance(LogicalUnit):
4377 """Starts an instance.
4380 HPATH = "instance-start"
4381 HTYPE = constants.HTYPE_INSTANCE
4383 ("instance_name", _TNonEmptyString),
4385 ("beparams", _TDict),
4386 ("hvparams", _TDict),
4389 ("beparams", _EmptyDict),
4390 ("hvparams", _EmptyDict),
4394 def CheckArguments(self):
4396 if self.op.beparams:
4397 # fill the beparams dict
4398 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4400 def ExpandNames(self):
4401 self._ExpandAndLockInstance()
4403 def BuildHooksEnv(self):
4406 This runs on master, primary and secondary nodes of the instance.
4410 "FORCE": self.op.force,
4412 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4413 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4416 def CheckPrereq(self):
4417 """Check prerequisites.
4419 This checks that the instance is in the cluster.
4422 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4423 assert self.instance is not None, \
4424 "Cannot retrieve locked instance %s" % self.op.instance_name
4427 if self.op.hvparams:
4428 # check hypervisor parameter syntax (locally)
4429 cluster = self.cfg.GetClusterInfo()
4430 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4431 filled_hvp = cluster.FillHV(instance)
4432 filled_hvp.update(self.op.hvparams)
4433 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4434 hv_type.CheckParameterSyntax(filled_hvp)
4435 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4437 _CheckNodeOnline(self, instance.primary_node)
4439 bep = self.cfg.GetClusterInfo().FillBE(instance)
4440 # check bridges existence
4441 _CheckInstanceBridgesExist(self, instance)
4443 remote_info = self.rpc.call_instance_info(instance.primary_node,
4445 instance.hypervisor)
4446 remote_info.Raise("Error checking node %s" % instance.primary_node,
4447 prereq=True, ecode=errors.ECODE_ENVIRON)
4448 if not remote_info.payload: # not running already
4449 _CheckNodeFreeMemory(self, instance.primary_node,
4450 "starting instance %s" % instance.name,
4451 bep[constants.BE_MEMORY], instance.hypervisor)
4453 def Exec(self, feedback_fn):
4454 """Start the instance.
4457 instance = self.instance
4458 force = self.op.force
4460 self.cfg.MarkInstanceUp(instance.name)
4462 node_current = instance.primary_node
4464 _StartInstanceDisks(self, instance, force)
4466 result = self.rpc.call_instance_start(node_current, instance,
4467 self.op.hvparams, self.op.beparams)
4468 msg = result.fail_msg
4470 _ShutdownInstanceDisks(self, instance)
4471 raise errors.OpExecError("Could not start instance: %s" % msg)
4474 class LURebootInstance(LogicalUnit):
4475 """Reboot an instance.
4478 HPATH = "instance-reboot"
4479 HTYPE = constants.HTYPE_INSTANCE
4481 ("instance_name", _TNonEmptyString),
4482 ("ignore_secondaries", _TBool),
4483 ("reboot_type", _TElemOf(constants.REBOOT_TYPES)),
4485 _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4488 def ExpandNames(self):
4489 self._ExpandAndLockInstance()
4491 def BuildHooksEnv(self):
4494 This runs on master, primary and secondary nodes of the instance.
4498 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4499 "REBOOT_TYPE": self.op.reboot_type,
4500 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4502 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4503 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4506 def CheckPrereq(self):
4507 """Check prerequisites.
4509 This checks that the instance is in the cluster.
4512 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4513 assert self.instance is not None, \
4514 "Cannot retrieve locked instance %s" % self.op.instance_name
4516 _CheckNodeOnline(self, instance.primary_node)
4518 # check bridges existence
4519 _CheckInstanceBridgesExist(self, instance)
4521 def Exec(self, feedback_fn):
4522 """Reboot the instance.
4525 instance = self.instance
4526 ignore_secondaries = self.op.ignore_secondaries
4527 reboot_type = self.op.reboot_type
4529 node_current = instance.primary_node
4531 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4532 constants.INSTANCE_REBOOT_HARD]:
4533 for disk in instance.disks:
4534 self.cfg.SetDiskID(disk, node_current)
4535 result = self.rpc.call_instance_reboot(node_current, instance,
4537 self.op.shutdown_timeout)
4538 result.Raise("Could not reboot instance")
4540 result = self.rpc.call_instance_shutdown(node_current, instance,
4541 self.op.shutdown_timeout)
4542 result.Raise("Could not shutdown instance for full reboot")
4543 _ShutdownInstanceDisks(self, instance)
4544 _StartInstanceDisks(self, instance, ignore_secondaries)
4545 result = self.rpc.call_instance_start(node_current, instance, None, None)
4546 msg = result.fail_msg
4548 _ShutdownInstanceDisks(self, instance)
4549 raise errors.OpExecError("Could not start instance for"
4550 " full reboot: %s" % msg)
4552 self.cfg.MarkInstanceUp(instance.name)
4555 class LUShutdownInstance(LogicalUnit):
4556 """Shutdown an instance.
4559 HPATH = "instance-stop"
4560 HTYPE = constants.HTYPE_INSTANCE
4561 _OP_REQP = [("instance_name", _TNonEmptyString)]
4562 _OP_DEFS = [("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4565 def ExpandNames(self):
4566 self._ExpandAndLockInstance()
4568 def BuildHooksEnv(self):
4571 This runs on master, primary and secondary nodes of the instance.
4574 env = _BuildInstanceHookEnvByObject(self, self.instance)
4575 env["TIMEOUT"] = self.op.timeout
4576 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4579 def CheckPrereq(self):
4580 """Check prerequisites.
4582 This checks that the instance is in the cluster.
4585 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4586 assert self.instance is not None, \
4587 "Cannot retrieve locked instance %s" % self.op.instance_name
4588 _CheckNodeOnline(self, self.instance.primary_node)
4590 def Exec(self, feedback_fn):
4591 """Shutdown the instance.
4594 instance = self.instance
4595 node_current = instance.primary_node
4596 timeout = self.op.timeout
4597 self.cfg.MarkInstanceDown(instance.name)
4598 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4599 msg = result.fail_msg
4601 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4603 _ShutdownInstanceDisks(self, instance)
4606 class LUReinstallInstance(LogicalUnit):
4607 """Reinstall an instance.
4610 HPATH = "instance-reinstall"
4611 HTYPE = constants.HTYPE_INSTANCE
4612 _OP_REQP = [("instance_name", _TNonEmptyString)]
4615 ("force_variant", False),
4619 def ExpandNames(self):
4620 self._ExpandAndLockInstance()
4622 def BuildHooksEnv(self):
4625 This runs on master, primary and secondary nodes of the instance.
4628 env = _BuildInstanceHookEnvByObject(self, self.instance)
4629 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4632 def CheckPrereq(self):
4633 """Check prerequisites.
4635 This checks that the instance is in the cluster and is not running.
4638 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4639 assert instance is not None, \
4640 "Cannot retrieve locked instance %s" % self.op.instance_name
4641 _CheckNodeOnline(self, instance.primary_node)
4643 if instance.disk_template == constants.DT_DISKLESS:
4644 raise errors.OpPrereqError("Instance '%s' has no disks" %
4645 self.op.instance_name,
4647 _CheckInstanceDown(self, instance, "cannot reinstall")
4649 if self.op.os_type is not None:
4651 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4652 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4654 self.instance = instance
4656 def Exec(self, feedback_fn):
4657 """Reinstall the instance.
4660 inst = self.instance
4662 if self.op.os_type is not None:
4663 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4664 inst.os = self.op.os_type
4665 self.cfg.Update(inst, feedback_fn)
4667 _StartInstanceDisks(self, inst, None)
4669 feedback_fn("Running the instance OS create scripts...")
4670 # FIXME: pass debug option from opcode to backend
4671 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4672 self.op.debug_level)
4673 result.Raise("Could not install OS for instance %s on node %s" %
4674 (inst.name, inst.primary_node))
4676 _ShutdownInstanceDisks(self, inst)
4679 class LURecreateInstanceDisks(LogicalUnit):
4680 """Recreate an instance's missing disks.
4683 HPATH = "instance-recreate-disks"
4684 HTYPE = constants.HTYPE_INSTANCE
4686 ("instance_name", _TNonEmptyString),
4687 ("disks", _TListOf(_TPositiveInt)),
4691 def ExpandNames(self):
4692 self._ExpandAndLockInstance()
4694 def BuildHooksEnv(self):
4697 This runs on master, primary and secondary nodes of the instance.
4700 env = _BuildInstanceHookEnvByObject(self, self.instance)
4701 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4704 def CheckPrereq(self):
4705 """Check prerequisites.
4707 This checks that the instance is in the cluster and is not running.
4710 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4711 assert instance is not None, \
4712 "Cannot retrieve locked instance %s" % self.op.instance_name
4713 _CheckNodeOnline(self, instance.primary_node)
4715 if instance.disk_template == constants.DT_DISKLESS:
4716 raise errors.OpPrereqError("Instance '%s' has no disks" %
4717 self.op.instance_name, errors.ECODE_INVAL)
4718 _CheckInstanceDown(self, instance, "cannot recreate disks")
4720 if not self.op.disks:
4721 self.op.disks = range(len(instance.disks))
4723 for idx in self.op.disks:
4724 if idx >= len(instance.disks):
4725 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4728 self.instance = instance
4730 def Exec(self, feedback_fn):
4731 """Recreate the disks.
4735 for idx, _ in enumerate(self.instance.disks):
4736 if idx not in self.op.disks: # disk idx has not been passed in
4740 _CreateDisks(self, self.instance, to_skip=to_skip)
4743 class LURenameInstance(LogicalUnit):
4744 """Rename an instance.
4747 HPATH = "instance-rename"
4748 HTYPE = constants.HTYPE_INSTANCE
4750 ("instance_name", _TNonEmptyString),
4751 ("new_name", _TNonEmptyString),
4753 _OP_DEFS = [("ignore_ip", False)]
4755 def BuildHooksEnv(self):
4758 This runs on master, primary and secondary nodes of the instance.
4761 env = _BuildInstanceHookEnvByObject(self, self.instance)
4762 env["INSTANCE_NEW_NAME"] = self.op.new_name
4763 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4766 def CheckPrereq(self):
4767 """Check prerequisites.
4769 This checks that the instance is in the cluster and is not running.
4772 self.op.instance_name = _ExpandInstanceName(self.cfg,
4773 self.op.instance_name)
4774 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4775 assert instance is not None
4776 _CheckNodeOnline(self, instance.primary_node)
4777 _CheckInstanceDown(self, instance, "cannot rename")
4778 self.instance = instance
4780 # new name verification
4781 name_info = utils.GetHostInfo(self.op.new_name)
4783 self.op.new_name = new_name = name_info.name
4784 instance_list = self.cfg.GetInstanceList()
4785 if new_name in instance_list:
4786 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4787 new_name, errors.ECODE_EXISTS)
4789 if not self.op.ignore_ip:
4790 if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4791 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4792 (name_info.ip, new_name),
4793 errors.ECODE_NOTUNIQUE)
4795 def Exec(self, feedback_fn):
4796 """Reinstall the instance.
4799 inst = self.instance
4800 old_name = inst.name
4802 if inst.disk_template == constants.DT_FILE:
4803 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4805 self.cfg.RenameInstance(inst.name, self.op.new_name)
4806 # Change the instance lock. This is definitely safe while we hold the BGL
4807 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4808 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4810 # re-read the instance from the configuration after rename
4811 inst = self.cfg.GetInstanceInfo(self.op.new_name)
4813 if inst.disk_template == constants.DT_FILE:
4814 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4815 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4816 old_file_storage_dir,
4817 new_file_storage_dir)
4818 result.Raise("Could not rename on node %s directory '%s' to '%s'"
4819 " (but the instance has been renamed in Ganeti)" %
4820 (inst.primary_node, old_file_storage_dir,
4821 new_file_storage_dir))
4823 _StartInstanceDisks(self, inst, None)
4825 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4826 old_name, self.op.debug_level)
4827 msg = result.fail_msg
4829 msg = ("Could not run OS rename script for instance %s on node %s"
4830 " (but the instance has been renamed in Ganeti): %s" %
4831 (inst.name, inst.primary_node, msg))
4832 self.proc.LogWarning(msg)
4834 _ShutdownInstanceDisks(self, inst)
4837 class LURemoveInstance(LogicalUnit):
4838 """Remove an instance.
4841 HPATH = "instance-remove"
4842 HTYPE = constants.HTYPE_INSTANCE
4844 ("instance_name", _TNonEmptyString),
4845 ("ignore_failures", _TBool),
4847 _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4850 def ExpandNames(self):
4851 self._ExpandAndLockInstance()
4852 self.needed_locks[locking.LEVEL_NODE] = []
4853 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4855 def DeclareLocks(self, level):
4856 if level == locking.LEVEL_NODE:
4857 self._LockInstancesNodes()
4859 def BuildHooksEnv(self):
4862 This runs on master, primary and secondary nodes of the instance.
4865 env = _BuildInstanceHookEnvByObject(self, self.instance)
4866 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4867 nl = [self.cfg.GetMasterNode()]
4868 nl_post = list(self.instance.all_nodes) + nl
4869 return env, nl, nl_post
4871 def CheckPrereq(self):
4872 """Check prerequisites.
4874 This checks that the instance is in the cluster.
4877 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4878 assert self.instance is not None, \
4879 "Cannot retrieve locked instance %s" % self.op.instance_name
4881 def Exec(self, feedback_fn):
4882 """Remove the instance.
4885 instance = self.instance
4886 logging.info("Shutting down instance %s on node %s",
4887 instance.name, instance.primary_node)
4889 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4890 self.op.shutdown_timeout)
4891 msg = result.fail_msg
4893 if self.op.ignore_failures:
4894 feedback_fn("Warning: can't shutdown instance: %s" % msg)
4896 raise errors.OpExecError("Could not shutdown instance %s on"
4898 (instance.name, instance.primary_node, msg))
4900 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4903 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4904 """Utility function to remove an instance.
4907 logging.info("Removing block devices for instance %s", instance.name)
4909 if not _RemoveDisks(lu, instance):
4910 if not ignore_failures:
4911 raise errors.OpExecError("Can't remove instance's disks")
4912 feedback_fn("Warning: can't remove instance's disks")
4914 logging.info("Removing instance %s out of cluster config", instance.name)
4916 lu.cfg.RemoveInstance(instance.name)
4918 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4919 "Instance lock removal conflict"
4921 # Remove lock for the instance
4922 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4925 class LUQueryInstances(NoHooksLU):
4926 """Logical unit for querying instances.
4929 # pylint: disable-msg=W0142
4931 ("output_fields", _TListOf(_TNonEmptyString)),
4932 ("names", _TListOf(_TNonEmptyString)),
4933 ("use_locking", _TBool),
4936 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4937 "serial_no", "ctime", "mtime", "uuid"]
4938 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4940 "disk_template", "ip", "mac", "bridge",
4941 "nic_mode", "nic_link",
4942 "sda_size", "sdb_size", "vcpus", "tags",
4943 "network_port", "beparams",
4944 r"(disk)\.(size)/([0-9]+)",
4945 r"(disk)\.(sizes)", "disk_usage",
4946 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4947 r"(nic)\.(bridge)/([0-9]+)",
4948 r"(nic)\.(macs|ips|modes|links|bridges)",
4949 r"(disk|nic)\.(count)",
4951 ] + _SIMPLE_FIELDS +
4953 for name in constants.HVS_PARAMETERS
4954 if name not in constants.HVC_GLOBALS] +
4956 for name in constants.BES_PARAMETERS])
4957 _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4960 def CheckArguments(self):
4961 _CheckOutputFields(static=self._FIELDS_STATIC,
4962 dynamic=self._FIELDS_DYNAMIC,
4963 selected=self.op.output_fields)
4965 def ExpandNames(self):
4966 self.needed_locks = {}
4967 self.share_locks[locking.LEVEL_INSTANCE] = 1
4968 self.share_locks[locking.LEVEL_NODE] = 1
4971 self.wanted = _GetWantedInstances(self, self.op.names)
4973 self.wanted = locking.ALL_SET
4975 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4976 self.do_locking = self.do_node_query and self.op.use_locking
4978 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4979 self.needed_locks[locking.LEVEL_NODE] = []
4980 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4982 def DeclareLocks(self, level):
4983 if level == locking.LEVEL_NODE and self.do_locking:
4984 self._LockInstancesNodes()
4986 def Exec(self, feedback_fn):
4987 """Computes the list of nodes and their attributes.
4990 # pylint: disable-msg=R0912
4991 # way too many branches here
4992 all_info = self.cfg.GetAllInstancesInfo()
4993 if self.wanted == locking.ALL_SET:
4994 # caller didn't specify instance names, so ordering is not important
4996 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4998 instance_names = all_info.keys()
4999 instance_names = utils.NiceSort(instance_names)
5001 # caller did specify names, so we must keep the ordering
5003 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5005 tgt_set = all_info.keys()
5006 missing = set(self.wanted).difference(tgt_set)
5008 raise errors.OpExecError("Some instances were removed before"
5009 " retrieving their data: %s" % missing)
5010 instance_names = self.wanted
5012 instance_list = [all_info[iname] for iname in instance_names]
5014 # begin data gathering
5016 nodes = frozenset([inst.primary_node for inst in instance_list])
5017 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5021 if self.do_node_query:
5023 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5025 result = node_data[name]
5027 # offline nodes will be in both lists
5028 off_nodes.append(name)
5030 bad_nodes.append(name)
5033 live_data.update(result.payload)
5034 # else no instance is alive
5036 live_data = dict([(name, {}) for name in instance_names])
5038 # end data gathering
5043 cluster = self.cfg.GetClusterInfo()
5044 for instance in instance_list:
5046 i_hv = cluster.FillHV(instance, skip_globals=True)
5047 i_be = cluster.FillBE(instance)
5048 i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5049 for field in self.op.output_fields:
5050 st_match = self._FIELDS_STATIC.Matches(field)
5051 if field in self._SIMPLE_FIELDS:
5052 val = getattr(instance, field)
5053 elif field == "pnode":
5054 val = instance.primary_node
5055 elif field == "snodes":
5056 val = list(instance.secondary_nodes)
5057 elif field == "admin_state":
5058 val = instance.admin_up
5059 elif field == "oper_state":
5060 if instance.primary_node in bad_nodes:
5063 val = bool(live_data.get(instance.name))
5064 elif field == "status":
5065 if instance.primary_node in off_nodes:
5066 val = "ERROR_nodeoffline"
5067 elif instance.primary_node in bad_nodes:
5068 val = "ERROR_nodedown"
5070 running = bool(live_data.get(instance.name))
5072 if instance.admin_up:
5077 if instance.admin_up:
5081 elif field == "oper_ram":
5082 if instance.primary_node in bad_nodes:
5084 elif instance.name in live_data:
5085 val = live_data[instance.name].get("memory", "?")
5088 elif field == "vcpus":
5089 val = i_be[constants.BE_VCPUS]
5090 elif field == "disk_template":
5091 val = instance.disk_template
5094 val = instance.nics[0].ip
5097 elif field == "nic_mode":
5099 val = i_nicp[0][constants.NIC_MODE]
5102 elif field == "nic_link":
5104 val = i_nicp[0][constants.NIC_LINK]
5107 elif field == "bridge":
5108 if (instance.nics and
5109 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5110 val = i_nicp[0][constants.NIC_LINK]
5113 elif field == "mac":
5115 val = instance.nics[0].mac
5118 elif field == "sda_size" or field == "sdb_size":
5119 idx = ord(field[2]) - ord('a')
5121 val = instance.FindDisk(idx).size
5122 except errors.OpPrereqError:
5124 elif field == "disk_usage": # total disk usage per node
5125 disk_sizes = [{'size': disk.size} for disk in instance.disks]
5126 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5127 elif field == "tags":
5128 val = list(instance.GetTags())
5129 elif field == "hvparams":
5131 elif (field.startswith(HVPREFIX) and
5132 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5133 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5134 val = i_hv.get(field[len(HVPREFIX):], None)
5135 elif field == "beparams":
5137 elif (field.startswith(BEPREFIX) and
5138 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5139 val = i_be.get(field[len(BEPREFIX):], None)
5140 elif st_match and st_match.groups():
5141 # matches a variable list
5142 st_groups = st_match.groups()
5143 if st_groups and st_groups[0] == "disk":
5144 if st_groups[1] == "count":
5145 val = len(instance.disks)
5146 elif st_groups[1] == "sizes":
5147 val = [disk.size for disk in instance.disks]
5148 elif st_groups[1] == "size":
5150 val = instance.FindDisk(st_groups[2]).size
5151 except errors.OpPrereqError:
5154 assert False, "Unhandled disk parameter"
5155 elif st_groups[0] == "nic":
5156 if st_groups[1] == "count":
5157 val = len(instance.nics)
5158 elif st_groups[1] == "macs":
5159 val = [nic.mac for nic in instance.nics]
5160 elif st_groups[1] == "ips":
5161 val = [nic.ip for nic in instance.nics]
5162 elif st_groups[1] == "modes":
5163 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5164 elif st_groups[1] == "links":
5165 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5166 elif st_groups[1] == "bridges":
5169 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5170 val.append(nicp[constants.NIC_LINK])
5175 nic_idx = int(st_groups[2])
5176 if nic_idx >= len(instance.nics):
5179 if st_groups[1] == "mac":
5180 val = instance.nics[nic_idx].mac
5181 elif st_groups[1] == "ip":
5182 val = instance.nics[nic_idx].ip
5183 elif st_groups[1] == "mode":
5184 val = i_nicp[nic_idx][constants.NIC_MODE]
5185 elif st_groups[1] == "link":
5186 val = i_nicp[nic_idx][constants.NIC_LINK]
5187 elif st_groups[1] == "bridge":
5188 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5189 if nic_mode == constants.NIC_MODE_BRIDGED:
5190 val = i_nicp[nic_idx][constants.NIC_LINK]
5194 assert False, "Unhandled NIC parameter"
5196 assert False, ("Declared but unhandled variable parameter '%s'" %
5199 assert False, "Declared but unhandled parameter '%s'" % field
5206 class LUFailoverInstance(LogicalUnit):
5207 """Failover an instance.
5210 HPATH = "instance-failover"
5211 HTYPE = constants.HTYPE_INSTANCE
5213 ("instance_name", _TNonEmptyString),
5214 ("ignore_consistency", _TBool),
5216 _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
5219 def ExpandNames(self):
5220 self._ExpandAndLockInstance()
5221 self.needed_locks[locking.LEVEL_NODE] = []
5222 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5224 def DeclareLocks(self, level):
5225 if level == locking.LEVEL_NODE:
5226 self._LockInstancesNodes()
5228 def BuildHooksEnv(self):
5231 This runs on master, primary and secondary nodes of the instance.
5234 instance = self.instance
5235 source_node = instance.primary_node
5236 target_node = instance.secondary_nodes[0]
5238 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5239 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5240 "OLD_PRIMARY": source_node,
5241 "OLD_SECONDARY": target_node,
5242 "NEW_PRIMARY": target_node,
5243 "NEW_SECONDARY": source_node,
5245 env.update(_BuildInstanceHookEnvByObject(self, instance))
5246 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5248 nl_post.append(source_node)
5249 return env, nl, nl_post
5251 def CheckPrereq(self):
5252 """Check prerequisites.
5254 This checks that the instance is in the cluster.
5257 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5258 assert self.instance is not None, \
5259 "Cannot retrieve locked instance %s" % self.op.instance_name
5261 bep = self.cfg.GetClusterInfo().FillBE(instance)
5262 if instance.disk_template not in constants.DTS_NET_MIRROR:
5263 raise errors.OpPrereqError("Instance's disk layout is not"
5264 " network mirrored, cannot failover.",
5267 secondary_nodes = instance.secondary_nodes
5268 if not secondary_nodes:
5269 raise errors.ProgrammerError("no secondary node but using "
5270 "a mirrored disk template")
5272 target_node = secondary_nodes[0]
5273 _CheckNodeOnline(self, target_node)
5274 _CheckNodeNotDrained(self, target_node)
5275 if instance.admin_up:
5276 # check memory requirements on the secondary node
5277 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5278 instance.name, bep[constants.BE_MEMORY],
5279 instance.hypervisor)
5281 self.LogInfo("Not checking memory on the secondary node as"
5282 " instance will not be started")
5284 # check bridge existance
5285 _CheckInstanceBridgesExist(self, instance, node=target_node)
5287 def Exec(self, feedback_fn):
5288 """Failover an instance.
5290 The failover is done by shutting it down on its present node and
5291 starting it on the secondary.
5294 instance = self.instance
5296 source_node = instance.primary_node
5297 target_node = instance.secondary_nodes[0]
5299 if instance.admin_up:
5300 feedback_fn("* checking disk consistency between source and target")
5301 for dev in instance.disks:
5302 # for drbd, these are drbd over lvm
5303 if not _CheckDiskConsistency(self, dev, target_node, False):
5304 if not self.op.ignore_consistency:
5305 raise errors.OpExecError("Disk %s is degraded on target node,"
5306 " aborting failover." % dev.iv_name)
5308 feedback_fn("* not checking disk consistency as instance is not running")
5310 feedback_fn("* shutting down instance on source node")
5311 logging.info("Shutting down instance %s on node %s",
5312 instance.name, source_node)
5314 result = self.rpc.call_instance_shutdown(source_node, instance,
5315 self.op.shutdown_timeout)
5316 msg = result.fail_msg
5318 if self.op.ignore_consistency:
5319 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5320 " Proceeding anyway. Please make sure node"
5321 " %s is down. Error details: %s",
5322 instance.name, source_node, source_node, msg)
5324 raise errors.OpExecError("Could not shutdown instance %s on"
5326 (instance.name, source_node, msg))
5328 feedback_fn("* deactivating the instance's disks on source node")
5329 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5330 raise errors.OpExecError("Can't shut down the instance's disks.")
5332 instance.primary_node = target_node
5333 # distribute new instance config to the other nodes
5334 self.cfg.Update(instance, feedback_fn)
5336 # Only start the instance if it's marked as up
5337 if instance.admin_up:
5338 feedback_fn("* activating the instance's disks on target node")
5339 logging.info("Starting instance %s on node %s",
5340 instance.name, target_node)
5342 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5343 ignore_secondaries=True)
5345 _ShutdownInstanceDisks(self, instance)
5346 raise errors.OpExecError("Can't activate the instance's disks")
5348 feedback_fn("* starting the instance on the target node")
5349 result = self.rpc.call_instance_start(target_node, instance, None, None)
5350 msg = result.fail_msg
5352 _ShutdownInstanceDisks(self, instance)
5353 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5354 (instance.name, target_node, msg))
5357 class LUMigrateInstance(LogicalUnit):
5358 """Migrate an instance.
5360 This is migration without shutting down, compared to the failover,
5361 which is done with shutdown.
5364 HPATH = "instance-migrate"
5365 HTYPE = constants.HTYPE_INSTANCE
5367 ("instance_name", _TNonEmptyString),
5369 ("cleanup", _TBool),
5374 def ExpandNames(self):
5375 self._ExpandAndLockInstance()
5377 self.needed_locks[locking.LEVEL_NODE] = []
5378 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5380 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5381 self.op.live, self.op.cleanup)
5382 self.tasklets = [self._migrater]
5384 def DeclareLocks(self, level):
5385 if level == locking.LEVEL_NODE:
5386 self._LockInstancesNodes()
5388 def BuildHooksEnv(self):
5391 This runs on master, primary and secondary nodes of the instance.
5394 instance = self._migrater.instance
5395 source_node = instance.primary_node
5396 target_node = instance.secondary_nodes[0]
5397 env = _BuildInstanceHookEnvByObject(self, instance)
5398 env["MIGRATE_LIVE"] = self.op.live
5399 env["MIGRATE_CLEANUP"] = self.op.cleanup
5401 "OLD_PRIMARY": source_node,
5402 "OLD_SECONDARY": target_node,
5403 "NEW_PRIMARY": target_node,
5404 "NEW_SECONDARY": source_node,
5406 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5408 nl_post.append(source_node)
5409 return env, nl, nl_post
5412 class LUMoveInstance(LogicalUnit):
5413 """Move an instance by data-copying.
5416 HPATH = "instance-move"
5417 HTYPE = constants.HTYPE_INSTANCE
5419 ("instance_name", _TNonEmptyString),
5420 ("target_node", _TNonEmptyString),
5422 _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
5425 def ExpandNames(self):
5426 self._ExpandAndLockInstance()
5427 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5428 self.op.target_node = target_node
5429 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5430 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5432 def DeclareLocks(self, level):
5433 if level == locking.LEVEL_NODE:
5434 self._LockInstancesNodes(primary_only=True)
5436 def BuildHooksEnv(self):
5439 This runs on master, primary and secondary nodes of the instance.
5443 "TARGET_NODE": self.op.target_node,
5444 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5446 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5447 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5448 self.op.target_node]
5451 def CheckPrereq(self):
5452 """Check prerequisites.
5454 This checks that the instance is in the cluster.
5457 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5458 assert self.instance is not None, \
5459 "Cannot retrieve locked instance %s" % self.op.instance_name
5461 node = self.cfg.GetNodeInfo(self.op.target_node)
5462 assert node is not None, \
5463 "Cannot retrieve locked node %s" % self.op.target_node
5465 self.target_node = target_node = node.name
5467 if target_node == instance.primary_node:
5468 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5469 (instance.name, target_node),
5472 bep = self.cfg.GetClusterInfo().FillBE(instance)
5474 for idx, dsk in enumerate(instance.disks):
5475 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5476 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5477 " cannot copy" % idx, errors.ECODE_STATE)
5479 _CheckNodeOnline(self, target_node)
5480 _CheckNodeNotDrained(self, target_node)
5482 if instance.admin_up:
5483 # check memory requirements on the secondary node
5484 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5485 instance.name, bep[constants.BE_MEMORY],
5486 instance.hypervisor)
5488 self.LogInfo("Not checking memory on the secondary node as"
5489 " instance will not be started")
5491 # check bridge existance
5492 _CheckInstanceBridgesExist(self, instance, node=target_node)
5494 def Exec(self, feedback_fn):
5495 """Move an instance.
5497 The move is done by shutting it down on its present node, copying
5498 the data over (slow) and starting it on the new node.
5501 instance = self.instance
5503 source_node = instance.primary_node
5504 target_node = self.target_node
5506 self.LogInfo("Shutting down instance %s on source node %s",
5507 instance.name, source_node)
5509 result = self.rpc.call_instance_shutdown(source_node, instance,
5510 self.op.shutdown_timeout)
5511 msg = result.fail_msg
5513 if self.op.ignore_consistency:
5514 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5515 " Proceeding anyway. Please make sure node"
5516 " %s is down. Error details: %s",
5517 instance.name, source_node, source_node, msg)
5519 raise errors.OpExecError("Could not shutdown instance %s on"
5521 (instance.name, source_node, msg))
5523 # create the target disks
5525 _CreateDisks(self, instance, target_node=target_node)
5526 except errors.OpExecError:
5527 self.LogWarning("Device creation failed, reverting...")
5529 _RemoveDisks(self, instance, target_node=target_node)
5531 self.cfg.ReleaseDRBDMinors(instance.name)
5534 cluster_name = self.cfg.GetClusterInfo().cluster_name
5537 # activate, get path, copy the data over
5538 for idx, disk in enumerate(instance.disks):
5539 self.LogInfo("Copying data for disk %d", idx)
5540 result = self.rpc.call_blockdev_assemble(target_node, disk,
5541 instance.name, True)
5543 self.LogWarning("Can't assemble newly created disk %d: %s",
5544 idx, result.fail_msg)
5545 errs.append(result.fail_msg)
5547 dev_path = result.payload
5548 result = self.rpc.call_blockdev_export(source_node, disk,
5549 target_node, dev_path,
5552 self.LogWarning("Can't copy data over for disk %d: %s",
5553 idx, result.fail_msg)
5554 errs.append(result.fail_msg)
5558 self.LogWarning("Some disks failed to copy, aborting")
5560 _RemoveDisks(self, instance, target_node=target_node)
5562 self.cfg.ReleaseDRBDMinors(instance.name)
5563 raise errors.OpExecError("Errors during disk copy: %s" %
5566 instance.primary_node = target_node
5567 self.cfg.Update(instance, feedback_fn)
5569 self.LogInfo("Removing the disks on the original node")
5570 _RemoveDisks(self, instance, target_node=source_node)
5572 # Only start the instance if it's marked as up
5573 if instance.admin_up:
5574 self.LogInfo("Starting instance %s on node %s",
5575 instance.name, target_node)
5577 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5578 ignore_secondaries=True)
5580 _ShutdownInstanceDisks(self, instance)
5581 raise errors.OpExecError("Can't activate the instance's disks")
5583 result = self.rpc.call_instance_start(target_node, instance, None, None)
5584 msg = result.fail_msg
5586 _ShutdownInstanceDisks(self, instance)
5587 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5588 (instance.name, target_node, msg))
5591 class LUMigrateNode(LogicalUnit):
5592 """Migrate all instances from a node.
5595 HPATH = "node-migrate"
5596 HTYPE = constants.HTYPE_NODE
5598 ("node_name", _TNonEmptyString),
5603 def ExpandNames(self):
5604 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5606 self.needed_locks = {
5607 locking.LEVEL_NODE: [self.op.node_name],
5610 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5612 # Create tasklets for migrating instances for all instances on this node
5616 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5617 logging.debug("Migrating instance %s", inst.name)
5618 names.append(inst.name)
5620 tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5622 self.tasklets = tasklets
5624 # Declare instance locks
5625 self.needed_locks[locking.LEVEL_INSTANCE] = names
5627 def DeclareLocks(self, level):
5628 if level == locking.LEVEL_NODE:
5629 self._LockInstancesNodes()
5631 def BuildHooksEnv(self):
5634 This runs on the master, the primary and all the secondaries.
5638 "NODE_NAME": self.op.node_name,
5641 nl = [self.cfg.GetMasterNode()]
5643 return (env, nl, nl)
5646 class TLMigrateInstance(Tasklet):
5647 def __init__(self, lu, instance_name, live, cleanup):
5648 """Initializes this class.
5651 Tasklet.__init__(self, lu)
5654 self.instance_name = instance_name
5656 self.cleanup = cleanup
5658 def CheckPrereq(self):
5659 """Check prerequisites.
5661 This checks that the instance is in the cluster.
5664 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5665 instance = self.cfg.GetInstanceInfo(instance_name)
5666 assert instance is not None
5668 if instance.disk_template != constants.DT_DRBD8:
5669 raise errors.OpPrereqError("Instance's disk layout is not"
5670 " drbd8, cannot migrate.", errors.ECODE_STATE)
5672 secondary_nodes = instance.secondary_nodes
5673 if not secondary_nodes:
5674 raise errors.ConfigurationError("No secondary node but using"
5675 " drbd8 disk template")
5677 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5679 target_node = secondary_nodes[0]
5680 # check memory requirements on the secondary node
5681 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5682 instance.name, i_be[constants.BE_MEMORY],
5683 instance.hypervisor)
5685 # check bridge existance
5686 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5688 if not self.cleanup:
5689 _CheckNodeNotDrained(self.lu, target_node)
5690 result = self.rpc.call_instance_migratable(instance.primary_node,
5692 result.Raise("Can't migrate, please use failover",
5693 prereq=True, ecode=errors.ECODE_STATE)
5695 self.instance = instance
5697 def _WaitUntilSync(self):
5698 """Poll with custom rpc for disk sync.
5700 This uses our own step-based rpc call.
5703 self.feedback_fn("* wait until resync is done")
5707 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5709 self.instance.disks)
5711 for node, nres in result.items():
5712 nres.Raise("Cannot resync disks on node %s" % node)
5713 node_done, node_percent = nres.payload
5714 all_done = all_done and node_done
5715 if node_percent is not None:
5716 min_percent = min(min_percent, node_percent)
5718 if min_percent < 100:
5719 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5722 def _EnsureSecondary(self, node):
5723 """Demote a node to secondary.
5726 self.feedback_fn("* switching node %s to secondary mode" % node)
5728 for dev in self.instance.disks:
5729 self.cfg.SetDiskID(dev, node)
5731 result = self.rpc.call_blockdev_close(node, self.instance.name,
5732 self.instance.disks)
5733 result.Raise("Cannot change disk to secondary on node %s" % node)
5735 def _GoStandalone(self):
5736 """Disconnect from the network.
5739 self.feedback_fn("* changing into standalone mode")
5740 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5741 self.instance.disks)
5742 for node, nres in result.items():
5743 nres.Raise("Cannot disconnect disks node %s" % node)
5745 def _GoReconnect(self, multimaster):
5746 """Reconnect to the network.
5752 msg = "single-master"
5753 self.feedback_fn("* changing disks into %s mode" % msg)
5754 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5755 self.instance.disks,
5756 self.instance.name, multimaster)
5757 for node, nres in result.items():
5758 nres.Raise("Cannot change disks config on node %s" % node)
5760 def _ExecCleanup(self):
5761 """Try to cleanup after a failed migration.
5763 The cleanup is done by:
5764 - check that the instance is running only on one node
5765 (and update the config if needed)
5766 - change disks on its secondary node to secondary
5767 - wait until disks are fully synchronized
5768 - disconnect from the network
5769 - change disks into single-master mode
5770 - wait again until disks are fully synchronized
5773 instance = self.instance
5774 target_node = self.target_node
5775 source_node = self.source_node
5777 # check running on only one node
5778 self.feedback_fn("* checking where the instance actually runs"
5779 " (if this hangs, the hypervisor might be in"
5781 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5782 for node, result in ins_l.items():
5783 result.Raise("Can't contact node %s" % node)
5785 runningon_source = instance.name in ins_l[source_node].payload
5786 runningon_target = instance.name in ins_l[target_node].payload
5788 if runningon_source and runningon_target:
5789 raise errors.OpExecError("Instance seems to be running on two nodes,"
5790 " or the hypervisor is confused. You will have"
5791 " to ensure manually that it runs only on one"
5792 " and restart this operation.")
5794 if not (runningon_source or runningon_target):
5795 raise errors.OpExecError("Instance does not seem to be running at all."
5796 " In this case, it's safer to repair by"
5797 " running 'gnt-instance stop' to ensure disk"
5798 " shutdown, and then restarting it.")
5800 if runningon_target:
5801 # the migration has actually succeeded, we need to update the config
5802 self.feedback_fn("* instance running on secondary node (%s),"
5803 " updating config" % target_node)
5804 instance.primary_node = target_node
5805 self.cfg.Update(instance, self.feedback_fn)
5806 demoted_node = source_node
5808 self.feedback_fn("* instance confirmed to be running on its"
5809 " primary node (%s)" % source_node)
5810 demoted_node = target_node
5812 self._EnsureSecondary(demoted_node)
5814 self._WaitUntilSync()
5815 except errors.OpExecError:
5816 # we ignore here errors, since if the device is standalone, it
5817 # won't be able to sync
5819 self._GoStandalone()
5820 self._GoReconnect(False)
5821 self._WaitUntilSync()
5823 self.feedback_fn("* done")
5825 def _RevertDiskStatus(self):
5826 """Try to revert the disk status after a failed migration.
5829 target_node = self.target_node
5831 self._EnsureSecondary(target_node)
5832 self._GoStandalone()
5833 self._GoReconnect(False)
5834 self._WaitUntilSync()
5835 except errors.OpExecError, err:
5836 self.lu.LogWarning("Migration failed and I can't reconnect the"
5837 " drives: error '%s'\n"
5838 "Please look and recover the instance status" %
5841 def _AbortMigration(self):
5842 """Call the hypervisor code to abort a started migration.
5845 instance = self.instance
5846 target_node = self.target_node
5847 migration_info = self.migration_info
5849 abort_result = self.rpc.call_finalize_migration(target_node,
5853 abort_msg = abort_result.fail_msg
5855 logging.error("Aborting migration failed on target node %s: %s",
5856 target_node, abort_msg)
5857 # Don't raise an exception here, as we stil have to try to revert the
5858 # disk status, even if this step failed.
5860 def _ExecMigration(self):
5861 """Migrate an instance.
5863 The migrate is done by:
5864 - change the disks into dual-master mode
5865 - wait until disks are fully synchronized again
5866 - migrate the instance
5867 - change disks on the new secondary node (the old primary) to secondary
5868 - wait until disks are fully synchronized
5869 - change disks into single-master mode
5872 instance = self.instance
5873 target_node = self.target_node
5874 source_node = self.source_node
5876 self.feedback_fn("* checking disk consistency between source and target")
5877 for dev in instance.disks:
5878 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5879 raise errors.OpExecError("Disk %s is degraded or not fully"
5880 " synchronized on target node,"
5881 " aborting migrate." % dev.iv_name)
5883 # First get the migration information from the remote node
5884 result = self.rpc.call_migration_info(source_node, instance)
5885 msg = result.fail_msg
5887 log_err = ("Failed fetching source migration information from %s: %s" %
5889 logging.error(log_err)
5890 raise errors.OpExecError(log_err)
5892 self.migration_info = migration_info = result.payload
5894 # Then switch the disks to master/master mode
5895 self._EnsureSecondary(target_node)
5896 self._GoStandalone()
5897 self._GoReconnect(True)
5898 self._WaitUntilSync()
5900 self.feedback_fn("* preparing %s to accept the instance" % target_node)
5901 result = self.rpc.call_accept_instance(target_node,
5904 self.nodes_ip[target_node])
5906 msg = result.fail_msg
5908 logging.error("Instance pre-migration failed, trying to revert"
5909 " disk status: %s", msg)
5910 self.feedback_fn("Pre-migration failed, aborting")
5911 self._AbortMigration()
5912 self._RevertDiskStatus()
5913 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5914 (instance.name, msg))
5916 self.feedback_fn("* migrating instance to %s" % target_node)
5918 result = self.rpc.call_instance_migrate(source_node, instance,
5919 self.nodes_ip[target_node],
5921 msg = result.fail_msg
5923 logging.error("Instance migration failed, trying to revert"
5924 " disk status: %s", msg)
5925 self.feedback_fn("Migration failed, aborting")
5926 self._AbortMigration()
5927 self._RevertDiskStatus()
5928 raise errors.OpExecError("Could not migrate instance %s: %s" %
5929 (instance.name, msg))
5932 instance.primary_node = target_node
5933 # distribute new instance config to the other nodes
5934 self.cfg.Update(instance, self.feedback_fn)
5936 result = self.rpc.call_finalize_migration(target_node,
5940 msg = result.fail_msg
5942 logging.error("Instance migration succeeded, but finalization failed:"
5944 raise errors.OpExecError("Could not finalize instance migration: %s" %
5947 self._EnsureSecondary(source_node)
5948 self._WaitUntilSync()
5949 self._GoStandalone()
5950 self._GoReconnect(False)
5951 self._WaitUntilSync()
5953 self.feedback_fn("* done")
5955 def Exec(self, feedback_fn):
5956 """Perform the migration.
5959 feedback_fn("Migrating instance %s" % self.instance.name)
5961 self.feedback_fn = feedback_fn
5963 self.source_node = self.instance.primary_node
5964 self.target_node = self.instance.secondary_nodes[0]
5965 self.all_nodes = [self.source_node, self.target_node]
5967 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5968 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5972 return self._ExecCleanup()
5974 return self._ExecMigration()
5977 def _CreateBlockDev(lu, node, instance, device, force_create,
5979 """Create a tree of block devices on a given node.
5981 If this device type has to be created on secondaries, create it and
5984 If not, just recurse to children keeping the same 'force' value.
5986 @param lu: the lu on whose behalf we execute
5987 @param node: the node on which to create the device
5988 @type instance: L{objects.Instance}
5989 @param instance: the instance which owns the device
5990 @type device: L{objects.Disk}
5991 @param device: the device to create
5992 @type force_create: boolean
5993 @param force_create: whether to force creation of this device; this
5994 will be change to True whenever we find a device which has
5995 CreateOnSecondary() attribute
5996 @param info: the extra 'metadata' we should attach to the device
5997 (this will be represented as a LVM tag)
5998 @type force_open: boolean
5999 @param force_open: this parameter will be passes to the
6000 L{backend.BlockdevCreate} function where it specifies
6001 whether we run on primary or not, and it affects both
6002 the child assembly and the device own Open() execution
6005 if device.CreateOnSecondary():
6009 for child in device.children:
6010 _CreateBlockDev(lu, node, instance, child, force_create,
6013 if not force_create:
6016 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6019 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6020 """Create a single block device on a given node.
6022 This will not recurse over children of the device, so they must be
6025 @param lu: the lu on whose behalf we execute
6026 @param node: the node on which to create the device
6027 @type instance: L{objects.Instance}
6028 @param instance: the instance which owns the device
6029 @type device: L{objects.Disk}
6030 @param device: the device to create
6031 @param info: the extra 'metadata' we should attach to the device
6032 (this will be represented as a LVM tag)
6033 @type force_open: boolean
6034 @param force_open: this parameter will be passes to the
6035 L{backend.BlockdevCreate} function where it specifies
6036 whether we run on primary or not, and it affects both
6037 the child assembly and the device own Open() execution
6040 lu.cfg.SetDiskID(device, node)
6041 result = lu.rpc.call_blockdev_create(node, device, device.size,
6042 instance.name, force_open, info)
6043 result.Raise("Can't create block device %s on"
6044 " node %s for instance %s" % (device, node, instance.name))
6045 if device.physical_id is None:
6046 device.physical_id = result.payload
6049 def _GenerateUniqueNames(lu, exts):
6050 """Generate a suitable LV name.
6052 This will generate a logical volume name for the given instance.
6057 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6058 results.append("%s%s" % (new_id, val))
6062 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6064 """Generate a drbd8 device complete with its children.
6067 port = lu.cfg.AllocatePort()
6068 vgname = lu.cfg.GetVGName()
6069 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6070 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6071 logical_id=(vgname, names[0]))
6072 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6073 logical_id=(vgname, names[1]))
6074 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6075 logical_id=(primary, secondary, port,
6078 children=[dev_data, dev_meta],
6083 def _GenerateDiskTemplate(lu, template_name,
6084 instance_name, primary_node,
6085 secondary_nodes, disk_info,
6086 file_storage_dir, file_driver,
6088 """Generate the entire disk layout for a given template type.
6091 #TODO: compute space requirements
6093 vgname = lu.cfg.GetVGName()
6094 disk_count = len(disk_info)
6096 if template_name == constants.DT_DISKLESS:
6098 elif template_name == constants.DT_PLAIN:
6099 if len(secondary_nodes) != 0:
6100 raise errors.ProgrammerError("Wrong template configuration")
6102 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6103 for i in range(disk_count)])
6104 for idx, disk in enumerate(disk_info):
6105 disk_index = idx + base_index
6106 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6107 logical_id=(vgname, names[idx]),
6108 iv_name="disk/%d" % disk_index,
6110 disks.append(disk_dev)
6111 elif template_name == constants.DT_DRBD8:
6112 if len(secondary_nodes) != 1:
6113 raise errors.ProgrammerError("Wrong template configuration")
6114 remote_node = secondary_nodes[0]
6115 minors = lu.cfg.AllocateDRBDMinor(
6116 [primary_node, remote_node] * len(disk_info), instance_name)
6119 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6120 for i in range(disk_count)]):
6121 names.append(lv_prefix + "_data")
6122 names.append(lv_prefix + "_meta")
6123 for idx, disk in enumerate(disk_info):
6124 disk_index = idx + base_index
6125 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6126 disk["size"], names[idx*2:idx*2+2],
6127 "disk/%d" % disk_index,
6128 minors[idx*2], minors[idx*2+1])
6129 disk_dev.mode = disk["mode"]
6130 disks.append(disk_dev)
6131 elif template_name == constants.DT_FILE:
6132 if len(secondary_nodes) != 0:
6133 raise errors.ProgrammerError("Wrong template configuration")
6135 _RequireFileStorage()
6137 for idx, disk in enumerate(disk_info):
6138 disk_index = idx + base_index
6139 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6140 iv_name="disk/%d" % disk_index,
6141 logical_id=(file_driver,
6142 "%s/disk%d" % (file_storage_dir,
6145 disks.append(disk_dev)
6147 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6151 def _GetInstanceInfoText(instance):
6152 """Compute that text that should be added to the disk's metadata.
6155 return "originstname+%s" % instance.name
6158 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6159 """Create all disks for an instance.
6161 This abstracts away some work from AddInstance.
6163 @type lu: L{LogicalUnit}
6164 @param lu: the logical unit on whose behalf we execute
6165 @type instance: L{objects.Instance}
6166 @param instance: the instance whose disks we should create
6168 @param to_skip: list of indices to skip
6169 @type target_node: string
6170 @param target_node: if passed, overrides the target node for creation
6172 @return: the success of the creation
6175 info = _GetInstanceInfoText(instance)
6176 if target_node is None:
6177 pnode = instance.primary_node
6178 all_nodes = instance.all_nodes
6183 if instance.disk_template == constants.DT_FILE:
6184 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6185 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6187 result.Raise("Failed to create directory '%s' on"
6188 " node %s" % (file_storage_dir, pnode))
6190 # Note: this needs to be kept in sync with adding of disks in
6191 # LUSetInstanceParams
6192 for idx, device in enumerate(instance.disks):
6193 if to_skip and idx in to_skip:
6195 logging.info("Creating volume %s for instance %s",
6196 device.iv_name, instance.name)
6198 for node in all_nodes:
6199 f_create = node == pnode
6200 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6203 def _RemoveDisks(lu, instance, target_node=None):
6204 """Remove all disks for an instance.
6206 This abstracts away some work from `AddInstance()` and
6207 `RemoveInstance()`. Note that in case some of the devices couldn't
6208 be removed, the removal will continue with the other ones (compare
6209 with `_CreateDisks()`).
6211 @type lu: L{LogicalUnit}
6212 @param lu: the logical unit on whose behalf we execute
6213 @type instance: L{objects.Instance}
6214 @param instance: the instance whose disks we should remove
6215 @type target_node: string
6216 @param target_node: used to override the node on which to remove the disks
6218 @return: the success of the removal
6221 logging.info("Removing block devices for instance %s", instance.name)
6224 for device in instance.disks:
6226 edata = [(target_node, device)]
6228 edata = device.ComputeNodeTree(instance.primary_node)
6229 for node, disk in edata:
6230 lu.cfg.SetDiskID(disk, node)
6231 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6233 lu.LogWarning("Could not remove block device %s on node %s,"
6234 " continuing anyway: %s", device.iv_name, node, msg)
6237 if instance.disk_template == constants.DT_FILE:
6238 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6242 tgt = instance.primary_node
6243 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6245 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6246 file_storage_dir, instance.primary_node, result.fail_msg)
6252 def _ComputeDiskSize(disk_template, disks):
6253 """Compute disk size requirements in the volume group
6256 # Required free disk space as a function of disk and swap space
6258 constants.DT_DISKLESS: None,
6259 constants.DT_PLAIN: sum(d["size"] for d in disks),
6260 # 128 MB are added for drbd metadata for each disk
6261 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6262 constants.DT_FILE: None,
6265 if disk_template not in req_size_dict:
6266 raise errors.ProgrammerError("Disk template '%s' size requirement"
6267 " is unknown" % disk_template)
6269 return req_size_dict[disk_template]
6272 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6273 """Hypervisor parameter validation.
6275 This function abstract the hypervisor parameter validation to be
6276 used in both instance create and instance modify.
6278 @type lu: L{LogicalUnit}
6279 @param lu: the logical unit for which we check
6280 @type nodenames: list
6281 @param nodenames: the list of nodes on which we should check
6282 @type hvname: string
6283 @param hvname: the name of the hypervisor we should use
6284 @type hvparams: dict
6285 @param hvparams: the parameters which we need to check
6286 @raise errors.OpPrereqError: if the parameters are not valid
6289 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6292 for node in nodenames:
6296 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6299 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6300 """OS parameters validation.
6302 @type lu: L{LogicalUnit}
6303 @param lu: the logical unit for which we check
6304 @type required: boolean
6305 @param required: whether the validation should fail if the OS is not
6307 @type nodenames: list
6308 @param nodenames: the list of nodes on which we should check
6309 @type osname: string
6310 @param osname: the name of the hypervisor we should use
6311 @type osparams: dict
6312 @param osparams: the parameters which we need to check
6313 @raise errors.OpPrereqError: if the parameters are not valid
6316 result = lu.rpc.call_os_validate(required, nodenames, osname,
6317 [constants.OS_VALIDATE_PARAMETERS],
6319 for node, nres in result.items():
6320 # we don't check for offline cases since this should be run only
6321 # against the master node and/or an instance's nodes
6322 nres.Raise("OS Parameters validation failed on node %s" % node)
6323 if not nres.payload:
6324 lu.LogInfo("OS %s not found on node %s, validation skipped",
6328 class LUCreateInstance(LogicalUnit):
6329 """Create an instance.
6332 HPATH = "instance-add"
6333 HTYPE = constants.HTYPE_INSTANCE
6335 ("instance_name", _TNonEmptyString),
6336 ("mode", _TElemOf(constants.INSTANCE_CREATE_MODES)),
6338 ("wait_for_sync", _TBool),
6339 ("ip_check", _TBool),
6340 ("disks", _TListOf(_TDict)),
6341 ("nics", _TListOf(_TDict)),
6342 ("hvparams", _TDict),
6343 ("beparams", _TDict),
6344 ("osparams", _TDict),
6347 ("name_check", True),
6348 ("no_install", False),
6350 ("force_variant", False),
6351 ("source_handshake", None),
6352 ("source_x509_ca", None),
6353 ("source_instance_name", None),
6358 ("iallocator", None),
6359 ("hypervisor", None),
6360 ("disk_template", None),
6361 ("identify_defaults", None),
6365 def CheckArguments(self):
6369 # do not require name_check to ease forward/backward compatibility
6371 if self.op.no_install and self.op.start:
6372 self.LogInfo("No-installation mode selected, disabling startup")
6373 self.op.start = False
6374 # validate/normalize the instance name
6375 self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6376 if self.op.ip_check and not self.op.name_check:
6377 # TODO: make the ip check more flexible and not depend on the name check
6378 raise errors.OpPrereqError("Cannot do ip checks without a name check",
6381 # check nics' parameter names
6382 for nic in self.op.nics:
6383 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6385 # check disks. parameter names and consistent adopt/no-adopt strategy
6386 has_adopt = has_no_adopt = False
6387 for disk in self.op.disks:
6388 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6393 if has_adopt and has_no_adopt:
6394 raise errors.OpPrereqError("Either all disks are adopted or none is",
6397 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6398 raise errors.OpPrereqError("Disk adoption is not supported for the"
6399 " '%s' disk template" %
6400 self.op.disk_template,
6402 if self.op.iallocator is not None:
6403 raise errors.OpPrereqError("Disk adoption not allowed with an"
6404 " iallocator script", errors.ECODE_INVAL)
6405 if self.op.mode == constants.INSTANCE_IMPORT:
6406 raise errors.OpPrereqError("Disk adoption not allowed for"
6407 " instance import", errors.ECODE_INVAL)
6409 self.adopt_disks = has_adopt
6411 # instance name verification
6412 if self.op.name_check:
6413 self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6414 self.op.instance_name = self.hostname1.name
6415 # used in CheckPrereq for ip ping check
6416 self.check_ip = self.hostname1.ip
6417 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6418 raise errors.OpPrereqError("Remote imports require names to be checked" %
6421 self.check_ip = None
6423 # file storage checks
6424 if (self.op.file_driver and
6425 not self.op.file_driver in constants.FILE_DRIVER):
6426 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6427 self.op.file_driver, errors.ECODE_INVAL)
6429 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6430 raise errors.OpPrereqError("File storage directory path not absolute",
6433 ### Node/iallocator related checks
6434 if [self.op.iallocator, self.op.pnode].count(None) != 1:
6435 raise errors.OpPrereqError("One and only one of iallocator and primary"
6436 " node must be given",
6439 self._cds = _GetClusterDomainSecret()
6441 if self.op.mode == constants.INSTANCE_IMPORT:
6442 # On import force_variant must be True, because if we forced it at
6443 # initial install, our only chance when importing it back is that it
6445 self.op.force_variant = True
6447 if self.op.no_install:
6448 self.LogInfo("No-installation mode has no effect during import")
6450 elif self.op.mode == constants.INSTANCE_CREATE:
6451 if self.op.os_type is None:
6452 raise errors.OpPrereqError("No guest OS specified",
6454 if self.op.disk_template is None:
6455 raise errors.OpPrereqError("No disk template specified",
6458 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6459 # Check handshake to ensure both clusters have the same domain secret
6460 src_handshake = self.op.source_handshake
6461 if not src_handshake:
6462 raise errors.OpPrereqError("Missing source handshake",
6465 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6468 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6471 # Load and check source CA
6472 self.source_x509_ca_pem = self.op.source_x509_ca
6473 if not self.source_x509_ca_pem:
6474 raise errors.OpPrereqError("Missing source X509 CA",
6478 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6480 except OpenSSL.crypto.Error, err:
6481 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6482 (err, ), errors.ECODE_INVAL)
6484 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6485 if errcode is not None:
6486 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6489 self.source_x509_ca = cert
6491 src_instance_name = self.op.source_instance_name
6492 if not src_instance_name:
6493 raise errors.OpPrereqError("Missing source instance name",
6496 self.source_instance_name = \
6497 utils.GetHostInfo(utils.HostInfo.NormalizeName(src_instance_name)).name
6500 raise errors.OpPrereqError("Invalid instance creation mode %r" %
6501 self.op.mode, errors.ECODE_INVAL)
6503 def ExpandNames(self):
6504 """ExpandNames for CreateInstance.
6506 Figure out the right locks for instance creation.
6509 self.needed_locks = {}
6511 instance_name = self.op.instance_name
6512 # this is just a preventive check, but someone might still add this
6513 # instance in the meantime, and creation will fail at lock-add time
6514 if instance_name in self.cfg.GetInstanceList():
6515 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6516 instance_name, errors.ECODE_EXISTS)
6518 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6520 if self.op.iallocator:
6521 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6523 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6524 nodelist = [self.op.pnode]
6525 if self.op.snode is not None:
6526 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6527 nodelist.append(self.op.snode)
6528 self.needed_locks[locking.LEVEL_NODE] = nodelist
6530 # in case of import lock the source node too
6531 if self.op.mode == constants.INSTANCE_IMPORT:
6532 src_node = self.op.src_node
6533 src_path = self.op.src_path
6535 if src_path is None:
6536 self.op.src_path = src_path = self.op.instance_name
6538 if src_node is None:
6539 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6540 self.op.src_node = None
6541 if os.path.isabs(src_path):
6542 raise errors.OpPrereqError("Importing an instance from an absolute"
6543 " path requires a source node option.",
6546 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6547 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6548 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6549 if not os.path.isabs(src_path):
6550 self.op.src_path = src_path = \
6551 utils.PathJoin(constants.EXPORT_DIR, src_path)
6553 def _RunAllocator(self):
6554 """Run the allocator based on input opcode.
6557 nics = [n.ToDict() for n in self.nics]
6558 ial = IAllocator(self.cfg, self.rpc,
6559 mode=constants.IALLOCATOR_MODE_ALLOC,
6560 name=self.op.instance_name,
6561 disk_template=self.op.disk_template,
6564 vcpus=self.be_full[constants.BE_VCPUS],
6565 mem_size=self.be_full[constants.BE_MEMORY],
6568 hypervisor=self.op.hypervisor,
6571 ial.Run(self.op.iallocator)
6574 raise errors.OpPrereqError("Can't compute nodes using"
6575 " iallocator '%s': %s" %
6576 (self.op.iallocator, ial.info),
6578 if len(ial.result) != ial.required_nodes:
6579 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6580 " of nodes (%s), required %s" %
6581 (self.op.iallocator, len(ial.result),
6582 ial.required_nodes), errors.ECODE_FAULT)
6583 self.op.pnode = ial.result[0]
6584 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6585 self.op.instance_name, self.op.iallocator,
6586 utils.CommaJoin(ial.result))
6587 if ial.required_nodes == 2:
6588 self.op.snode = ial.result[1]
6590 def BuildHooksEnv(self):
6593 This runs on master, primary and secondary nodes of the instance.
6597 "ADD_MODE": self.op.mode,
6599 if self.op.mode == constants.INSTANCE_IMPORT:
6600 env["SRC_NODE"] = self.op.src_node
6601 env["SRC_PATH"] = self.op.src_path
6602 env["SRC_IMAGES"] = self.src_images
6604 env.update(_BuildInstanceHookEnv(
6605 name=self.op.instance_name,
6606 primary_node=self.op.pnode,
6607 secondary_nodes=self.secondaries,
6608 status=self.op.start,
6609 os_type=self.op.os_type,
6610 memory=self.be_full[constants.BE_MEMORY],
6611 vcpus=self.be_full[constants.BE_VCPUS],
6612 nics=_NICListToTuple(self, self.nics),
6613 disk_template=self.op.disk_template,
6614 disks=[(d["size"], d["mode"]) for d in self.disks],
6617 hypervisor_name=self.op.hypervisor,
6620 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6624 def _ReadExportInfo(self):
6625 """Reads the export information from disk.
6627 It will override the opcode source node and path with the actual
6628 information, if these two were not specified before.
6630 @return: the export information
6633 assert self.op.mode == constants.INSTANCE_IMPORT
6635 src_node = self.op.src_node
6636 src_path = self.op.src_path
6638 if src_node is None:
6639 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6640 exp_list = self.rpc.call_export_list(locked_nodes)
6642 for node in exp_list:
6643 if exp_list[node].fail_msg:
6645 if src_path in exp_list[node].payload:
6647 self.op.src_node = src_node = node
6648 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6652 raise errors.OpPrereqError("No export found for relative path %s" %
6653 src_path, errors.ECODE_INVAL)
6655 _CheckNodeOnline(self, src_node)
6656 result = self.rpc.call_export_info(src_node, src_path)
6657 result.Raise("No export or invalid export found in dir %s" % src_path)
6659 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6660 if not export_info.has_section(constants.INISECT_EXP):
6661 raise errors.ProgrammerError("Corrupted export config",
6662 errors.ECODE_ENVIRON)
6664 ei_version = export_info.get(constants.INISECT_EXP, "version")
6665 if (int(ei_version) != constants.EXPORT_VERSION):
6666 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6667 (ei_version, constants.EXPORT_VERSION),
6668 errors.ECODE_ENVIRON)
6671 def _ReadExportParams(self, einfo):
6672 """Use export parameters as defaults.
6674 In case the opcode doesn't specify (as in override) some instance
6675 parameters, then try to use them from the export information, if
6679 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6681 if self.op.disk_template is None:
6682 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6683 self.op.disk_template = einfo.get(constants.INISECT_INS,
6686 raise errors.OpPrereqError("No disk template specified and the export"
6687 " is missing the disk_template information",
6690 if not self.op.disks:
6691 if einfo.has_option(constants.INISECT_INS, "disk_count"):
6693 # TODO: import the disk iv_name too
6694 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6695 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6696 disks.append({"size": disk_sz})
6697 self.op.disks = disks
6699 raise errors.OpPrereqError("No disk info specified and the export"
6700 " is missing the disk information",
6703 if (not self.op.nics and
6704 einfo.has_option(constants.INISECT_INS, "nic_count")):
6706 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6708 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6709 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6714 if (self.op.hypervisor is None and
6715 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6716 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6717 if einfo.has_section(constants.INISECT_HYP):
6718 # use the export parameters but do not override the ones
6719 # specified by the user
6720 for name, value in einfo.items(constants.INISECT_HYP):
6721 if name not in self.op.hvparams:
6722 self.op.hvparams[name] = value
6724 if einfo.has_section(constants.INISECT_BEP):
6725 # use the parameters, without overriding
6726 for name, value in einfo.items(constants.INISECT_BEP):
6727 if name not in self.op.beparams:
6728 self.op.beparams[name] = value
6730 # try to read the parameters old style, from the main section
6731 for name in constants.BES_PARAMETERS:
6732 if (name not in self.op.beparams and
6733 einfo.has_option(constants.INISECT_INS, name)):
6734 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6736 if einfo.has_section(constants.INISECT_OSP):
6737 # use the parameters, without overriding
6738 for name, value in einfo.items(constants.INISECT_OSP):
6739 if name not in self.op.osparams:
6740 self.op.osparams[name] = value
6742 def _RevertToDefaults(self, cluster):
6743 """Revert the instance parameters to the default values.
6747 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6748 for name in self.op.hvparams.keys():
6749 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6750 del self.op.hvparams[name]
6752 be_defs = cluster.SimpleFillBE({})
6753 for name in self.op.beparams.keys():
6754 if name in be_defs and be_defs[name] == self.op.beparams[name]:
6755 del self.op.beparams[name]
6757 nic_defs = cluster.SimpleFillNIC({})
6758 for nic in self.op.nics:
6759 for name in constants.NICS_PARAMETERS:
6760 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6763 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6764 for name in self.op.osparams.keys():
6765 if name in os_defs and os_defs[name] == self.op.osparams[name]:
6766 del self.op.osparams[name]
6768 def CheckPrereq(self):
6769 """Check prerequisites.
6772 if self.op.mode == constants.INSTANCE_IMPORT:
6773 export_info = self._ReadExportInfo()
6774 self._ReadExportParams(export_info)
6776 _CheckDiskTemplate(self.op.disk_template)
6778 if (not self.cfg.GetVGName() and
6779 self.op.disk_template not in constants.DTS_NOT_LVM):
6780 raise errors.OpPrereqError("Cluster does not support lvm-based"
6781 " instances", errors.ECODE_STATE)
6783 if self.op.hypervisor is None:
6784 self.op.hypervisor = self.cfg.GetHypervisorType()
6786 cluster = self.cfg.GetClusterInfo()
6787 enabled_hvs = cluster.enabled_hypervisors
6788 if self.op.hypervisor not in enabled_hvs:
6789 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6790 " cluster (%s)" % (self.op.hypervisor,
6791 ",".join(enabled_hvs)),
6794 # check hypervisor parameter syntax (locally)
6795 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6796 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6798 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6799 hv_type.CheckParameterSyntax(filled_hvp)
6800 self.hv_full = filled_hvp
6801 # check that we don't specify global parameters on an instance
6802 _CheckGlobalHvParams(self.op.hvparams)
6804 # fill and remember the beparams dict
6805 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6806 self.be_full = cluster.SimpleFillBE(self.op.beparams)
6808 # build os parameters
6809 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6811 # now that hvp/bep are in final format, let's reset to defaults,
6813 if self.op.identify_defaults:
6814 self._RevertToDefaults(cluster)
6818 for idx, nic in enumerate(self.op.nics):
6819 nic_mode_req = nic.get("mode", None)
6820 nic_mode = nic_mode_req
6821 if nic_mode is None:
6822 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6824 # in routed mode, for the first nic, the default ip is 'auto'
6825 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6826 default_ip_mode = constants.VALUE_AUTO
6828 default_ip_mode = constants.VALUE_NONE
6830 # ip validity checks
6831 ip = nic.get("ip", default_ip_mode)
6832 if ip is None or ip.lower() == constants.VALUE_NONE:
6834 elif ip.lower() == constants.VALUE_AUTO:
6835 if not self.op.name_check:
6836 raise errors.OpPrereqError("IP address set to auto but name checks"
6837 " have been skipped. Aborting.",
6839 nic_ip = self.hostname1.ip
6841 if not utils.IsValidIP(ip):
6842 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6843 " like a valid IP" % ip,
6847 # TODO: check the ip address for uniqueness
6848 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6849 raise errors.OpPrereqError("Routed nic mode requires an ip address",
6852 # MAC address verification
6853 mac = nic.get("mac", constants.VALUE_AUTO)
6854 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6855 mac = utils.NormalizeAndValidateMac(mac)
6858 self.cfg.ReserveMAC(mac, self.proc.GetECId())
6859 except errors.ReservationError:
6860 raise errors.OpPrereqError("MAC address %s already in use"
6861 " in cluster" % mac,
6862 errors.ECODE_NOTUNIQUE)
6864 # bridge verification
6865 bridge = nic.get("bridge", None)
6866 link = nic.get("link", None)
6868 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6869 " at the same time", errors.ECODE_INVAL)
6870 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6871 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6878 nicparams[constants.NIC_MODE] = nic_mode_req
6880 nicparams[constants.NIC_LINK] = link
6882 check_params = cluster.SimpleFillNIC(nicparams)
6883 objects.NIC.CheckParameterSyntax(check_params)
6884 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6886 # disk checks/pre-build
6888 for disk in self.op.disks:
6889 mode = disk.get("mode", constants.DISK_RDWR)
6890 if mode not in constants.DISK_ACCESS_SET:
6891 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6892 mode, errors.ECODE_INVAL)
6893 size = disk.get("size", None)
6895 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6898 except (TypeError, ValueError):
6899 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6901 new_disk = {"size": size, "mode": mode}
6903 new_disk["adopt"] = disk["adopt"]
6904 self.disks.append(new_disk)
6906 if self.op.mode == constants.INSTANCE_IMPORT:
6908 # Check that the new instance doesn't have less disks than the export
6909 instance_disks = len(self.disks)
6910 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6911 if instance_disks < export_disks:
6912 raise errors.OpPrereqError("Not enough disks to import."
6913 " (instance: %d, export: %d)" %
6914 (instance_disks, export_disks),
6918 for idx in range(export_disks):
6919 option = 'disk%d_dump' % idx
6920 if export_info.has_option(constants.INISECT_INS, option):
6921 # FIXME: are the old os-es, disk sizes, etc. useful?
6922 export_name = export_info.get(constants.INISECT_INS, option)
6923 image = utils.PathJoin(self.op.src_path, export_name)
6924 disk_images.append(image)
6926 disk_images.append(False)
6928 self.src_images = disk_images
6930 old_name = export_info.get(constants.INISECT_INS, 'name')
6932 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6933 except (TypeError, ValueError), err:
6934 raise errors.OpPrereqError("Invalid export file, nic_count is not"
6935 " an integer: %s" % str(err),
6937 if self.op.instance_name == old_name:
6938 for idx, nic in enumerate(self.nics):
6939 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6940 nic_mac_ini = 'nic%d_mac' % idx
6941 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6943 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6945 # ip ping checks (we use the same ip that was resolved in ExpandNames)
6946 if self.op.ip_check:
6947 if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6948 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6949 (self.check_ip, self.op.instance_name),
6950 errors.ECODE_NOTUNIQUE)
6952 #### mac address generation
6953 # By generating here the mac address both the allocator and the hooks get
6954 # the real final mac address rather than the 'auto' or 'generate' value.
6955 # There is a race condition between the generation and the instance object
6956 # creation, which means that we know the mac is valid now, but we're not
6957 # sure it will be when we actually add the instance. If things go bad
6958 # adding the instance will abort because of a duplicate mac, and the
6959 # creation job will fail.
6960 for nic in self.nics:
6961 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6962 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6966 if self.op.iallocator is not None:
6967 self._RunAllocator()
6969 #### node related checks
6971 # check primary node
6972 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6973 assert self.pnode is not None, \
6974 "Cannot retrieve locked node %s" % self.op.pnode
6976 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6977 pnode.name, errors.ECODE_STATE)
6979 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6980 pnode.name, errors.ECODE_STATE)
6982 self.secondaries = []
6984 # mirror node verification
6985 if self.op.disk_template in constants.DTS_NET_MIRROR:
6986 if self.op.snode is None:
6987 raise errors.OpPrereqError("The networked disk templates need"
6988 " a mirror node", errors.ECODE_INVAL)
6989 if self.op.snode == pnode.name:
6990 raise errors.OpPrereqError("The secondary node cannot be the"
6991 " primary node.", errors.ECODE_INVAL)
6992 _CheckNodeOnline(self, self.op.snode)
6993 _CheckNodeNotDrained(self, self.op.snode)
6994 self.secondaries.append(self.op.snode)
6996 nodenames = [pnode.name] + self.secondaries
6998 req_size = _ComputeDiskSize(self.op.disk_template,
7001 # Check lv size requirements, if not adopting
7002 if req_size is not None and not self.adopt_disks:
7003 _CheckNodesFreeDisk(self, nodenames, req_size)
7005 if self.adopt_disks: # instead, we must check the adoption data
7006 all_lvs = set([i["adopt"] for i in self.disks])
7007 if len(all_lvs) != len(self.disks):
7008 raise errors.OpPrereqError("Duplicate volume names given for adoption",
7010 for lv_name in all_lvs:
7012 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7013 except errors.ReservationError:
7014 raise errors.OpPrereqError("LV named %s used by another instance" %
7015 lv_name, errors.ECODE_NOTUNIQUE)
7017 node_lvs = self.rpc.call_lv_list([pnode.name],
7018 self.cfg.GetVGName())[pnode.name]
7019 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7020 node_lvs = node_lvs.payload
7021 delta = all_lvs.difference(node_lvs.keys())
7023 raise errors.OpPrereqError("Missing logical volume(s): %s" %
7024 utils.CommaJoin(delta),
7026 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7028 raise errors.OpPrereqError("Online logical volumes found, cannot"
7029 " adopt: %s" % utils.CommaJoin(online_lvs),
7031 # update the size of disk based on what is found
7032 for dsk in self.disks:
7033 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7035 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7037 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7038 # check OS parameters (remotely)
7039 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7041 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7043 # memory check on primary node
7045 _CheckNodeFreeMemory(self, self.pnode.name,
7046 "creating instance %s" % self.op.instance_name,
7047 self.be_full[constants.BE_MEMORY],
7050 self.dry_run_result = list(nodenames)
7052 def Exec(self, feedback_fn):
7053 """Create and add the instance to the cluster.
7056 instance = self.op.instance_name
7057 pnode_name = self.pnode.name
7059 ht_kind = self.op.hypervisor
7060 if ht_kind in constants.HTS_REQ_PORT:
7061 network_port = self.cfg.AllocatePort()
7065 if constants.ENABLE_FILE_STORAGE:
7066 # this is needed because os.path.join does not accept None arguments
7067 if self.op.file_storage_dir is None:
7068 string_file_storage_dir = ""
7070 string_file_storage_dir = self.op.file_storage_dir
7072 # build the full file storage dir path
7073 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7074 string_file_storage_dir, instance)
7076 file_storage_dir = ""
7078 disks = _GenerateDiskTemplate(self,
7079 self.op.disk_template,
7080 instance, pnode_name,
7084 self.op.file_driver,
7087 iobj = objects.Instance(name=instance, os=self.op.os_type,
7088 primary_node=pnode_name,
7089 nics=self.nics, disks=disks,
7090 disk_template=self.op.disk_template,
7092 network_port=network_port,
7093 beparams=self.op.beparams,
7094 hvparams=self.op.hvparams,
7095 hypervisor=self.op.hypervisor,
7096 osparams=self.op.osparams,
7099 if self.adopt_disks:
7100 # rename LVs to the newly-generated names; we need to construct
7101 # 'fake' LV disks with the old data, plus the new unique_id
7102 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7104 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7105 rename_to.append(t_dsk.logical_id)
7106 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7107 self.cfg.SetDiskID(t_dsk, pnode_name)
7108 result = self.rpc.call_blockdev_rename(pnode_name,
7109 zip(tmp_disks, rename_to))
7110 result.Raise("Failed to rename adoped LVs")
7112 feedback_fn("* creating instance disks...")
7114 _CreateDisks(self, iobj)
7115 except errors.OpExecError:
7116 self.LogWarning("Device creation failed, reverting...")
7118 _RemoveDisks(self, iobj)
7120 self.cfg.ReleaseDRBDMinors(instance)
7123 feedback_fn("adding instance %s to cluster config" % instance)
7125 self.cfg.AddInstance(iobj, self.proc.GetECId())
7127 # Declare that we don't want to remove the instance lock anymore, as we've
7128 # added the instance to the config
7129 del self.remove_locks[locking.LEVEL_INSTANCE]
7130 # Unlock all the nodes
7131 if self.op.mode == constants.INSTANCE_IMPORT:
7132 nodes_keep = [self.op.src_node]
7133 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7134 if node != self.op.src_node]
7135 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7136 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7138 self.context.glm.release(locking.LEVEL_NODE)
7139 del self.acquired_locks[locking.LEVEL_NODE]
7141 if self.op.wait_for_sync:
7142 disk_abort = not _WaitForSync(self, iobj)
7143 elif iobj.disk_template in constants.DTS_NET_MIRROR:
7144 # make sure the disks are not degraded (still sync-ing is ok)
7146 feedback_fn("* checking mirrors status")
7147 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7152 _RemoveDisks(self, iobj)
7153 self.cfg.RemoveInstance(iobj.name)
7154 # Make sure the instance lock gets removed
7155 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7156 raise errors.OpExecError("There are some degraded disks for"
7159 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7160 if self.op.mode == constants.INSTANCE_CREATE:
7161 if not self.op.no_install:
7162 feedback_fn("* running the instance OS create scripts...")
7163 # FIXME: pass debug option from opcode to backend
7164 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7165 self.op.debug_level)
7166 result.Raise("Could not add os for instance %s"
7167 " on node %s" % (instance, pnode_name))
7169 elif self.op.mode == constants.INSTANCE_IMPORT:
7170 feedback_fn("* running the instance OS import scripts...")
7174 for idx, image in enumerate(self.src_images):
7178 # FIXME: pass debug option from opcode to backend
7179 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7180 constants.IEIO_FILE, (image, ),
7181 constants.IEIO_SCRIPT,
7182 (iobj.disks[idx], idx),
7184 transfers.append(dt)
7187 masterd.instance.TransferInstanceData(self, feedback_fn,
7188 self.op.src_node, pnode_name,
7189 self.pnode.secondary_ip,
7191 if not compat.all(import_result):
7192 self.LogWarning("Some disks for instance %s on node %s were not"
7193 " imported successfully" % (instance, pnode_name))
7195 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7196 feedback_fn("* preparing remote import...")
7197 connect_timeout = constants.RIE_CONNECT_TIMEOUT
7198 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7200 disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7201 self.source_x509_ca,
7202 self._cds, timeouts)
7203 if not compat.all(disk_results):
7204 # TODO: Should the instance still be started, even if some disks
7205 # failed to import (valid for local imports, too)?
7206 self.LogWarning("Some disks for instance %s on node %s were not"
7207 " imported successfully" % (instance, pnode_name))
7209 # Run rename script on newly imported instance
7210 assert iobj.name == instance
7211 feedback_fn("Running rename script for %s" % instance)
7212 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7213 self.source_instance_name,
7214 self.op.debug_level)
7216 self.LogWarning("Failed to run rename script for %s on node"
7217 " %s: %s" % (instance, pnode_name, result.fail_msg))
7220 # also checked in the prereq part
7221 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7225 iobj.admin_up = True
7226 self.cfg.Update(iobj, feedback_fn)
7227 logging.info("Starting instance %s on node %s", instance, pnode_name)
7228 feedback_fn("* starting instance...")
7229 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7230 result.Raise("Could not start instance")
7232 return list(iobj.all_nodes)
7235 class LUConnectConsole(NoHooksLU):
7236 """Connect to an instance's console.
7238 This is somewhat special in that it returns the command line that
7239 you need to run on the master node in order to connect to the
7243 _OP_REQP = [("instance_name", _TNonEmptyString)]
7246 def ExpandNames(self):
7247 self._ExpandAndLockInstance()
7249 def CheckPrereq(self):
7250 """Check prerequisites.
7252 This checks that the instance is in the cluster.
7255 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7256 assert self.instance is not None, \
7257 "Cannot retrieve locked instance %s" % self.op.instance_name
7258 _CheckNodeOnline(self, self.instance.primary_node)
7260 def Exec(self, feedback_fn):
7261 """Connect to the console of an instance
7264 instance = self.instance
7265 node = instance.primary_node
7267 node_insts = self.rpc.call_instance_list([node],
7268 [instance.hypervisor])[node]
7269 node_insts.Raise("Can't get node information from %s" % node)
7271 if instance.name not in node_insts.payload:
7272 raise errors.OpExecError("Instance %s is not running." % instance.name)
7274 logging.debug("Connecting to console of %s on %s", instance.name, node)
7276 hyper = hypervisor.GetHypervisor(instance.hypervisor)
7277 cluster = self.cfg.GetClusterInfo()
7278 # beparams and hvparams are passed separately, to avoid editing the
7279 # instance and then saving the defaults in the instance itself.
7280 hvparams = cluster.FillHV(instance)
7281 beparams = cluster.FillBE(instance)
7282 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7285 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7288 class LUReplaceDisks(LogicalUnit):
7289 """Replace the disks of an instance.
7292 HPATH = "mirrors-replace"
7293 HTYPE = constants.HTYPE_INSTANCE
7295 ("instance_name", _TNonEmptyString),
7296 ("mode", _TElemOf(constants.REPLACE_MODES)),
7297 ("disks", _TListOf(_TPositiveInt)),
7300 ("remote_node", None),
7301 ("iallocator", None),
7302 ("early_release", None),
7306 def CheckArguments(self):
7307 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7310 def ExpandNames(self):
7311 self._ExpandAndLockInstance()
7313 if self.op.iallocator is not None:
7314 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7316 elif self.op.remote_node is not None:
7317 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7318 self.op.remote_node = remote_node
7320 # Warning: do not remove the locking of the new secondary here
7321 # unless DRBD8.AddChildren is changed to work in parallel;
7322 # currently it doesn't since parallel invocations of
7323 # FindUnusedMinor will conflict
7324 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7325 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7328 self.needed_locks[locking.LEVEL_NODE] = []
7329 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7331 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7332 self.op.iallocator, self.op.remote_node,
7333 self.op.disks, False, self.op.early_release)
7335 self.tasklets = [self.replacer]
7337 def DeclareLocks(self, level):
7338 # If we're not already locking all nodes in the set we have to declare the
7339 # instance's primary/secondary nodes.
7340 if (level == locking.LEVEL_NODE and
7341 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7342 self._LockInstancesNodes()
7344 def BuildHooksEnv(self):
7347 This runs on the master, the primary and all the secondaries.
7350 instance = self.replacer.instance
7352 "MODE": self.op.mode,
7353 "NEW_SECONDARY": self.op.remote_node,
7354 "OLD_SECONDARY": instance.secondary_nodes[0],
7356 env.update(_BuildInstanceHookEnvByObject(self, instance))
7358 self.cfg.GetMasterNode(),
7359 instance.primary_node,
7361 if self.op.remote_node is not None:
7362 nl.append(self.op.remote_node)
7366 class TLReplaceDisks(Tasklet):
7367 """Replaces disks for an instance.
7369 Note: Locking is not within the scope of this class.
7372 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7373 disks, delay_iallocator, early_release):
7374 """Initializes this class.
7377 Tasklet.__init__(self, lu)
7380 self.instance_name = instance_name
7382 self.iallocator_name = iallocator_name
7383 self.remote_node = remote_node
7385 self.delay_iallocator = delay_iallocator
7386 self.early_release = early_release
7389 self.instance = None
7390 self.new_node = None
7391 self.target_node = None
7392 self.other_node = None
7393 self.remote_node_info = None
7394 self.node_secondary_ip = None
7397 def CheckArguments(mode, remote_node, iallocator):
7398 """Helper function for users of this class.
7401 # check for valid parameter combination
7402 if mode == constants.REPLACE_DISK_CHG:
7403 if remote_node is None and iallocator is None:
7404 raise errors.OpPrereqError("When changing the secondary either an"
7405 " iallocator script must be used or the"
7406 " new node given", errors.ECODE_INVAL)
7408 if remote_node is not None and iallocator is not None:
7409 raise errors.OpPrereqError("Give either the iallocator or the new"
7410 " secondary, not both", errors.ECODE_INVAL)
7412 elif remote_node is not None or iallocator is not None:
7413 # Not replacing the secondary
7414 raise errors.OpPrereqError("The iallocator and new node options can"
7415 " only be used when changing the"
7416 " secondary node", errors.ECODE_INVAL)
7419 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7420 """Compute a new secondary node using an IAllocator.
7423 ial = IAllocator(lu.cfg, lu.rpc,
7424 mode=constants.IALLOCATOR_MODE_RELOC,
7426 relocate_from=relocate_from)
7428 ial.Run(iallocator_name)
7431 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7432 " %s" % (iallocator_name, ial.info),
7435 if len(ial.result) != ial.required_nodes:
7436 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7437 " of nodes (%s), required %s" %
7439 len(ial.result), ial.required_nodes),
7442 remote_node_name = ial.result[0]
7444 lu.LogInfo("Selected new secondary for instance '%s': %s",
7445 instance_name, remote_node_name)
7447 return remote_node_name
7449 def _FindFaultyDisks(self, node_name):
7450 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7453 def CheckPrereq(self):
7454 """Check prerequisites.
7456 This checks that the instance is in the cluster.
7459 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7460 assert instance is not None, \
7461 "Cannot retrieve locked instance %s" % self.instance_name
7463 if instance.disk_template != constants.DT_DRBD8:
7464 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7465 " instances", errors.ECODE_INVAL)
7467 if len(instance.secondary_nodes) != 1:
7468 raise errors.OpPrereqError("The instance has a strange layout,"
7469 " expected one secondary but found %d" %
7470 len(instance.secondary_nodes),
7473 if not self.delay_iallocator:
7474 self._CheckPrereq2()
7476 def _CheckPrereq2(self):
7477 """Check prerequisites, second part.
7479 This function should always be part of CheckPrereq. It was separated and is
7480 now called from Exec because during node evacuation iallocator was only
7481 called with an unmodified cluster model, not taking planned changes into
7485 instance = self.instance
7486 secondary_node = instance.secondary_nodes[0]
7488 if self.iallocator_name is None:
7489 remote_node = self.remote_node
7491 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7492 instance.name, instance.secondary_nodes)
7494 if remote_node is not None:
7495 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7496 assert self.remote_node_info is not None, \
7497 "Cannot retrieve locked node %s" % remote_node
7499 self.remote_node_info = None
7501 if remote_node == self.instance.primary_node:
7502 raise errors.OpPrereqError("The specified node is the primary node of"
7503 " the instance.", errors.ECODE_INVAL)
7505 if remote_node == secondary_node:
7506 raise errors.OpPrereqError("The specified node is already the"
7507 " secondary node of the instance.",
7510 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7511 constants.REPLACE_DISK_CHG):
7512 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7515 if self.mode == constants.REPLACE_DISK_AUTO:
7516 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7517 faulty_secondary = self._FindFaultyDisks(secondary_node)
7519 if faulty_primary and faulty_secondary:
7520 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7521 " one node and can not be repaired"
7522 " automatically" % self.instance_name,
7526 self.disks = faulty_primary
7527 self.target_node = instance.primary_node
7528 self.other_node = secondary_node
7529 check_nodes = [self.target_node, self.other_node]
7530 elif faulty_secondary:
7531 self.disks = faulty_secondary
7532 self.target_node = secondary_node
7533 self.other_node = instance.primary_node
7534 check_nodes = [self.target_node, self.other_node]
7540 # Non-automatic modes
7541 if self.mode == constants.REPLACE_DISK_PRI:
7542 self.target_node = instance.primary_node
7543 self.other_node = secondary_node
7544 check_nodes = [self.target_node, self.other_node]
7546 elif self.mode == constants.REPLACE_DISK_SEC:
7547 self.target_node = secondary_node
7548 self.other_node = instance.primary_node
7549 check_nodes = [self.target_node, self.other_node]
7551 elif self.mode == constants.REPLACE_DISK_CHG:
7552 self.new_node = remote_node
7553 self.other_node = instance.primary_node
7554 self.target_node = secondary_node
7555 check_nodes = [self.new_node, self.other_node]
7557 _CheckNodeNotDrained(self.lu, remote_node)
7559 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7560 assert old_node_info is not None
7561 if old_node_info.offline and not self.early_release:
7562 # doesn't make sense to delay the release
7563 self.early_release = True
7564 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7565 " early-release mode", secondary_node)
7568 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7571 # If not specified all disks should be replaced
7573 self.disks = range(len(self.instance.disks))
7575 for node in check_nodes:
7576 _CheckNodeOnline(self.lu, node)
7578 # Check whether disks are valid
7579 for disk_idx in self.disks:
7580 instance.FindDisk(disk_idx)
7582 # Get secondary node IP addresses
7585 for node_name in [self.target_node, self.other_node, self.new_node]:
7586 if node_name is not None:
7587 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7589 self.node_secondary_ip = node_2nd_ip
7591 def Exec(self, feedback_fn):
7592 """Execute disk replacement.
7594 This dispatches the disk replacement to the appropriate handler.
7597 if self.delay_iallocator:
7598 self._CheckPrereq2()
7601 feedback_fn("No disks need replacement")
7604 feedback_fn("Replacing disk(s) %s for %s" %
7605 (utils.CommaJoin(self.disks), self.instance.name))
7607 activate_disks = (not self.instance.admin_up)
7609 # Activate the instance disks if we're replacing them on a down instance
7611 _StartInstanceDisks(self.lu, self.instance, True)
7614 # Should we replace the secondary node?
7615 if self.new_node is not None:
7616 fn = self._ExecDrbd8Secondary
7618 fn = self._ExecDrbd8DiskOnly
7620 return fn(feedback_fn)
7623 # Deactivate the instance disks if we're replacing them on a
7626 _SafeShutdownInstanceDisks(self.lu, self.instance)
7628 def _CheckVolumeGroup(self, nodes):
7629 self.lu.LogInfo("Checking volume groups")
7631 vgname = self.cfg.GetVGName()
7633 # Make sure volume group exists on all involved nodes
7634 results = self.rpc.call_vg_list(nodes)
7636 raise errors.OpExecError("Can't list volume groups on the nodes")
7640 res.Raise("Error checking node %s" % node)
7641 if vgname not in res.payload:
7642 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7645 def _CheckDisksExistence(self, nodes):
7646 # Check disk existence
7647 for idx, dev in enumerate(self.instance.disks):
7648 if idx not in self.disks:
7652 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7653 self.cfg.SetDiskID(dev, node)
7655 result = self.rpc.call_blockdev_find(node, dev)
7657 msg = result.fail_msg
7658 if msg or not result.payload:
7660 msg = "disk not found"
7661 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7664 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7665 for idx, dev in enumerate(self.instance.disks):
7666 if idx not in self.disks:
7669 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7672 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7674 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7675 " replace disks for instance %s" %
7676 (node_name, self.instance.name))
7678 def _CreateNewStorage(self, node_name):
7679 vgname = self.cfg.GetVGName()
7682 for idx, dev in enumerate(self.instance.disks):
7683 if idx not in self.disks:
7686 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7688 self.cfg.SetDiskID(dev, node_name)
7690 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7691 names = _GenerateUniqueNames(self.lu, lv_names)
7693 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7694 logical_id=(vgname, names[0]))
7695 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7696 logical_id=(vgname, names[1]))
7698 new_lvs = [lv_data, lv_meta]
7699 old_lvs = dev.children
7700 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7702 # we pass force_create=True to force the LVM creation
7703 for new_lv in new_lvs:
7704 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7705 _GetInstanceInfoText(self.instance), False)
7709 def _CheckDevices(self, node_name, iv_names):
7710 for name, (dev, _, _) in iv_names.iteritems():
7711 self.cfg.SetDiskID(dev, node_name)
7713 result = self.rpc.call_blockdev_find(node_name, dev)
7715 msg = result.fail_msg
7716 if msg or not result.payload:
7718 msg = "disk not found"
7719 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7722 if result.payload.is_degraded:
7723 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7725 def _RemoveOldStorage(self, node_name, iv_names):
7726 for name, (_, old_lvs, _) in iv_names.iteritems():
7727 self.lu.LogInfo("Remove logical volumes for %s" % name)
7730 self.cfg.SetDiskID(lv, node_name)
7732 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7734 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7735 hint="remove unused LVs manually")
7737 def _ReleaseNodeLock(self, node_name):
7738 """Releases the lock for a given node."""
7739 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7741 def _ExecDrbd8DiskOnly(self, feedback_fn):
7742 """Replace a disk on the primary or secondary for DRBD 8.
7744 The algorithm for replace is quite complicated:
7746 1. for each disk to be replaced:
7748 1. create new LVs on the target node with unique names
7749 1. detach old LVs from the drbd device
7750 1. rename old LVs to name_replaced.<time_t>
7751 1. rename new LVs to old LVs
7752 1. attach the new LVs (with the old names now) to the drbd device
7754 1. wait for sync across all devices
7756 1. for each modified disk:
7758 1. remove old LVs (which have the name name_replaces.<time_t>)
7760 Failures are not very well handled.
7765 # Step: check device activation
7766 self.lu.LogStep(1, steps_total, "Check device existence")
7767 self._CheckDisksExistence([self.other_node, self.target_node])
7768 self._CheckVolumeGroup([self.target_node, self.other_node])
7770 # Step: check other node consistency
7771 self.lu.LogStep(2, steps_total, "Check peer consistency")
7772 self._CheckDisksConsistency(self.other_node,
7773 self.other_node == self.instance.primary_node,
7776 # Step: create new storage
7777 self.lu.LogStep(3, steps_total, "Allocate new storage")
7778 iv_names = self._CreateNewStorage(self.target_node)
7780 # Step: for each lv, detach+rename*2+attach
7781 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7782 for dev, old_lvs, new_lvs in iv_names.itervalues():
7783 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7785 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7787 result.Raise("Can't detach drbd from local storage on node"
7788 " %s for device %s" % (self.target_node, dev.iv_name))
7790 #cfg.Update(instance)
7792 # ok, we created the new LVs, so now we know we have the needed
7793 # storage; as such, we proceed on the target node to rename
7794 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7795 # using the assumption that logical_id == physical_id (which in
7796 # turn is the unique_id on that node)
7798 # FIXME(iustin): use a better name for the replaced LVs
7799 temp_suffix = int(time.time())
7800 ren_fn = lambda d, suff: (d.physical_id[0],
7801 d.physical_id[1] + "_replaced-%s" % suff)
7803 # Build the rename list based on what LVs exist on the node
7804 rename_old_to_new = []
7805 for to_ren in old_lvs:
7806 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7807 if not result.fail_msg and result.payload:
7809 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7811 self.lu.LogInfo("Renaming the old LVs on the target node")
7812 result = self.rpc.call_blockdev_rename(self.target_node,
7814 result.Raise("Can't rename old LVs on node %s" % self.target_node)
7816 # Now we rename the new LVs to the old LVs
7817 self.lu.LogInfo("Renaming the new LVs on the target node")
7818 rename_new_to_old = [(new, old.physical_id)
7819 for old, new in zip(old_lvs, new_lvs)]
7820 result = self.rpc.call_blockdev_rename(self.target_node,
7822 result.Raise("Can't rename new LVs on node %s" % self.target_node)
7824 for old, new in zip(old_lvs, new_lvs):
7825 new.logical_id = old.logical_id
7826 self.cfg.SetDiskID(new, self.target_node)
7828 for disk in old_lvs:
7829 disk.logical_id = ren_fn(disk, temp_suffix)
7830 self.cfg.SetDiskID(disk, self.target_node)
7832 # Now that the new lvs have the old name, we can add them to the device
7833 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7834 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7836 msg = result.fail_msg
7838 for new_lv in new_lvs:
7839 msg2 = self.rpc.call_blockdev_remove(self.target_node,
7842 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7843 hint=("cleanup manually the unused logical"
7845 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7847 dev.children = new_lvs
7849 self.cfg.Update(self.instance, feedback_fn)
7852 if self.early_release:
7853 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7855 self._RemoveOldStorage(self.target_node, iv_names)
7856 # WARNING: we release both node locks here, do not do other RPCs
7857 # than WaitForSync to the primary node
7858 self._ReleaseNodeLock([self.target_node, self.other_node])
7861 # This can fail as the old devices are degraded and _WaitForSync
7862 # does a combined result over all disks, so we don't check its return value
7863 self.lu.LogStep(cstep, steps_total, "Sync devices")
7865 _WaitForSync(self.lu, self.instance)
7867 # Check all devices manually
7868 self._CheckDevices(self.instance.primary_node, iv_names)
7870 # Step: remove old storage
7871 if not self.early_release:
7872 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7874 self._RemoveOldStorage(self.target_node, iv_names)
7876 def _ExecDrbd8Secondary(self, feedback_fn):
7877 """Replace the secondary node for DRBD 8.
7879 The algorithm for replace is quite complicated:
7880 - for all disks of the instance:
7881 - create new LVs on the new node with same names
7882 - shutdown the drbd device on the old secondary
7883 - disconnect the drbd network on the primary
7884 - create the drbd device on the new secondary
7885 - network attach the drbd on the primary, using an artifice:
7886 the drbd code for Attach() will connect to the network if it
7887 finds a device which is connected to the good local disks but
7889 - wait for sync across all devices
7890 - remove all disks from the old secondary
7892 Failures are not very well handled.
7897 # Step: check device activation
7898 self.lu.LogStep(1, steps_total, "Check device existence")
7899 self._CheckDisksExistence([self.instance.primary_node])
7900 self._CheckVolumeGroup([self.instance.primary_node])
7902 # Step: check other node consistency
7903 self.lu.LogStep(2, steps_total, "Check peer consistency")
7904 self._CheckDisksConsistency(self.instance.primary_node, True, True)
7906 # Step: create new storage
7907 self.lu.LogStep(3, steps_total, "Allocate new storage")
7908 for idx, dev in enumerate(self.instance.disks):
7909 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7910 (self.new_node, idx))
7911 # we pass force_create=True to force LVM creation
7912 for new_lv in dev.children:
7913 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7914 _GetInstanceInfoText(self.instance), False)
7916 # Step 4: dbrd minors and drbd setups changes
7917 # after this, we must manually remove the drbd minors on both the
7918 # error and the success paths
7919 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7920 minors = self.cfg.AllocateDRBDMinor([self.new_node
7921 for dev in self.instance.disks],
7923 logging.debug("Allocated minors %r", minors)
7926 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7927 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7928 (self.new_node, idx))
7929 # create new devices on new_node; note that we create two IDs:
7930 # one without port, so the drbd will be activated without
7931 # networking information on the new node at this stage, and one
7932 # with network, for the latter activation in step 4
7933 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7934 if self.instance.primary_node == o_node1:
7937 assert self.instance.primary_node == o_node2, "Three-node instance?"
7940 new_alone_id = (self.instance.primary_node, self.new_node, None,
7941 p_minor, new_minor, o_secret)
7942 new_net_id = (self.instance.primary_node, self.new_node, o_port,
7943 p_minor, new_minor, o_secret)
7945 iv_names[idx] = (dev, dev.children, new_net_id)
7946 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7948 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7949 logical_id=new_alone_id,
7950 children=dev.children,
7953 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7954 _GetInstanceInfoText(self.instance), False)
7955 except errors.GenericError:
7956 self.cfg.ReleaseDRBDMinors(self.instance.name)
7959 # We have new devices, shutdown the drbd on the old secondary
7960 for idx, dev in enumerate(self.instance.disks):
7961 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7962 self.cfg.SetDiskID(dev, self.target_node)
7963 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7965 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7966 "node: %s" % (idx, msg),
7967 hint=("Please cleanup this device manually as"
7968 " soon as possible"))
7970 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7971 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7972 self.node_secondary_ip,
7973 self.instance.disks)\
7974 [self.instance.primary_node]
7976 msg = result.fail_msg
7978 # detaches didn't succeed (unlikely)
7979 self.cfg.ReleaseDRBDMinors(self.instance.name)
7980 raise errors.OpExecError("Can't detach the disks from the network on"
7981 " old node: %s" % (msg,))
7983 # if we managed to detach at least one, we update all the disks of
7984 # the instance to point to the new secondary
7985 self.lu.LogInfo("Updating instance configuration")
7986 for dev, _, new_logical_id in iv_names.itervalues():
7987 dev.logical_id = new_logical_id
7988 self.cfg.SetDiskID(dev, self.instance.primary_node)
7990 self.cfg.Update(self.instance, feedback_fn)
7992 # and now perform the drbd attach
7993 self.lu.LogInfo("Attaching primary drbds to new secondary"
7994 " (standalone => connected)")
7995 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7997 self.node_secondary_ip,
7998 self.instance.disks,
8001 for to_node, to_result in result.items():
8002 msg = to_result.fail_msg
8004 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8006 hint=("please do a gnt-instance info to see the"
8007 " status of disks"))
8009 if self.early_release:
8010 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8012 self._RemoveOldStorage(self.target_node, iv_names)
8013 # WARNING: we release all node locks here, do not do other RPCs
8014 # than WaitForSync to the primary node
8015 self._ReleaseNodeLock([self.instance.primary_node,
8020 # This can fail as the old devices are degraded and _WaitForSync
8021 # does a combined result over all disks, so we don't check its return value
8022 self.lu.LogStep(cstep, steps_total, "Sync devices")
8024 _WaitForSync(self.lu, self.instance)
8026 # Check all devices manually
8027 self._CheckDevices(self.instance.primary_node, iv_names)
8029 # Step: remove old storage
8030 if not self.early_release:
8031 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8032 self._RemoveOldStorage(self.target_node, iv_names)
8035 class LURepairNodeStorage(NoHooksLU):
8036 """Repairs the volume group on a node.
8039 _OP_REQP = [("node_name", _TNonEmptyString)]
8042 def CheckArguments(self):
8043 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8045 _CheckStorageType(self.op.storage_type)
8047 storage_type = self.op.storage_type
8049 if (constants.SO_FIX_CONSISTENCY not in
8050 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8051 raise errors.OpPrereqError("Storage units of type '%s' can not be"
8052 " repaired" % storage_type,
8055 def ExpandNames(self):
8056 self.needed_locks = {
8057 locking.LEVEL_NODE: [self.op.node_name],
8060 def _CheckFaultyDisks(self, instance, node_name):
8061 """Ensure faulty disks abort the opcode or at least warn."""
8063 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8065 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8066 " node '%s'" % (instance.name, node_name),
8068 except errors.OpPrereqError, err:
8069 if self.op.ignore_consistency:
8070 self.proc.LogWarning(str(err.args[0]))
8074 def CheckPrereq(self):
8075 """Check prerequisites.
8078 # Check whether any instance on this node has faulty disks
8079 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8080 if not inst.admin_up:
8082 check_nodes = set(inst.all_nodes)
8083 check_nodes.discard(self.op.node_name)
8084 for inst_node_name in check_nodes:
8085 self._CheckFaultyDisks(inst, inst_node_name)
8087 def Exec(self, feedback_fn):
8088 feedback_fn("Repairing storage unit '%s' on %s ..." %
8089 (self.op.name, self.op.node_name))
8091 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8092 result = self.rpc.call_storage_execute(self.op.node_name,
8093 self.op.storage_type, st_args,
8095 constants.SO_FIX_CONSISTENCY)
8096 result.Raise("Failed to repair storage unit '%s' on %s" %
8097 (self.op.name, self.op.node_name))
8100 class LUNodeEvacuationStrategy(NoHooksLU):
8101 """Computes the node evacuation strategy.
8104 _OP_REQP = [("nodes", _TListOf(_TNonEmptyString))]
8106 ("remote_node", None),
8107 ("iallocator", None),
8111 def CheckArguments(self):
8112 if self.op.remote_node is not None and self.op.iallocator is not None:
8113 raise errors.OpPrereqError("Give either the iallocator or the new"
8114 " secondary, not both", errors.ECODE_INVAL)
8116 def ExpandNames(self):
8117 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8118 self.needed_locks = locks = {}
8119 if self.op.remote_node is None:
8120 locks[locking.LEVEL_NODE] = locking.ALL_SET
8122 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8123 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8125 def Exec(self, feedback_fn):
8126 if self.op.remote_node is not None:
8128 for node in self.op.nodes:
8129 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8132 if i.primary_node == self.op.remote_node:
8133 raise errors.OpPrereqError("Node %s is the primary node of"
8134 " instance %s, cannot use it as"
8136 (self.op.remote_node, i.name),
8138 result.append([i.name, self.op.remote_node])
8140 ial = IAllocator(self.cfg, self.rpc,
8141 mode=constants.IALLOCATOR_MODE_MEVAC,
8142 evac_nodes=self.op.nodes)
8143 ial.Run(self.op.iallocator, validate=True)
8145 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8151 class LUGrowDisk(LogicalUnit):
8152 """Grow a disk of an instance.
8156 HTYPE = constants.HTYPE_INSTANCE
8158 ("instance_name", _TNonEmptyString),
8161 ("wait_for_sync", _TBool),
8165 def ExpandNames(self):
8166 self._ExpandAndLockInstance()
8167 self.needed_locks[locking.LEVEL_NODE] = []
8168 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8170 def DeclareLocks(self, level):
8171 if level == locking.LEVEL_NODE:
8172 self._LockInstancesNodes()
8174 def BuildHooksEnv(self):
8177 This runs on the master, the primary and all the secondaries.
8181 "DISK": self.op.disk,
8182 "AMOUNT": self.op.amount,
8184 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8185 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8188 def CheckPrereq(self):
8189 """Check prerequisites.
8191 This checks that the instance is in the cluster.
8194 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8195 assert instance is not None, \
8196 "Cannot retrieve locked instance %s" % self.op.instance_name
8197 nodenames = list(instance.all_nodes)
8198 for node in nodenames:
8199 _CheckNodeOnline(self, node)
8201 self.instance = instance
8203 if instance.disk_template not in constants.DTS_GROWABLE:
8204 raise errors.OpPrereqError("Instance's disk layout does not support"
8205 " growing.", errors.ECODE_INVAL)
8207 self.disk = instance.FindDisk(self.op.disk)
8209 if instance.disk_template != constants.DT_FILE:
8210 # TODO: check the free disk space for file, when that feature will be
8212 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8214 def Exec(self, feedback_fn):
8215 """Execute disk grow.
8218 instance = self.instance
8221 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8223 raise errors.OpExecError("Cannot activate block device to grow")
8225 for node in instance.all_nodes:
8226 self.cfg.SetDiskID(disk, node)
8227 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8228 result.Raise("Grow request failed to node %s" % node)
8230 # TODO: Rewrite code to work properly
8231 # DRBD goes into sync mode for a short amount of time after executing the
8232 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8233 # calling "resize" in sync mode fails. Sleeping for a short amount of
8234 # time is a work-around.
8237 disk.RecordGrow(self.op.amount)
8238 self.cfg.Update(instance, feedback_fn)
8239 if self.op.wait_for_sync:
8240 disk_abort = not _WaitForSync(self, instance, disks=[disk])
8242 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8243 " status.\nPlease check the instance.")
8244 if not instance.admin_up:
8245 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8246 elif not instance.admin_up:
8247 self.proc.LogWarning("Not shutting down the disk even if the instance is"
8248 " not supposed to be running because no wait for"
8249 " sync mode was requested.")
8252 class LUQueryInstanceData(NoHooksLU):
8253 """Query runtime instance data.
8257 ("instances", _TListOf(_TNonEmptyString)),
8262 def ExpandNames(self):
8263 self.needed_locks = {}
8264 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8266 if self.op.instances:
8267 self.wanted_names = []
8268 for name in self.op.instances:
8269 full_name = _ExpandInstanceName(self.cfg, name)
8270 self.wanted_names.append(full_name)
8271 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8273 self.wanted_names = None
8274 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8276 self.needed_locks[locking.LEVEL_NODE] = []
8277 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8279 def DeclareLocks(self, level):
8280 if level == locking.LEVEL_NODE:
8281 self._LockInstancesNodes()
8283 def CheckPrereq(self):
8284 """Check prerequisites.
8286 This only checks the optional instance list against the existing names.
8289 if self.wanted_names is None:
8290 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8292 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8293 in self.wanted_names]
8295 def _ComputeBlockdevStatus(self, node, instance_name, dev):
8296 """Returns the status of a block device
8299 if self.op.static or not node:
8302 self.cfg.SetDiskID(dev, node)
8304 result = self.rpc.call_blockdev_find(node, dev)
8308 result.Raise("Can't compute disk status for %s" % instance_name)
8310 status = result.payload
8314 return (status.dev_path, status.major, status.minor,
8315 status.sync_percent, status.estimated_time,
8316 status.is_degraded, status.ldisk_status)
8318 def _ComputeDiskStatus(self, instance, snode, dev):
8319 """Compute block device status.
8322 if dev.dev_type in constants.LDS_DRBD:
8323 # we change the snode then (otherwise we use the one passed in)
8324 if dev.logical_id[0] == instance.primary_node:
8325 snode = dev.logical_id[1]
8327 snode = dev.logical_id[0]
8329 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8331 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8334 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8335 for child in dev.children]
8340 "iv_name": dev.iv_name,
8341 "dev_type": dev.dev_type,
8342 "logical_id": dev.logical_id,
8343 "physical_id": dev.physical_id,
8344 "pstatus": dev_pstatus,
8345 "sstatus": dev_sstatus,
8346 "children": dev_children,
8353 def Exec(self, feedback_fn):
8354 """Gather and return data"""
8357 cluster = self.cfg.GetClusterInfo()
8359 for instance in self.wanted_instances:
8360 if not self.op.static:
8361 remote_info = self.rpc.call_instance_info(instance.primary_node,
8363 instance.hypervisor)
8364 remote_info.Raise("Error checking node %s" % instance.primary_node)
8365 remote_info = remote_info.payload
8366 if remote_info and "state" in remote_info:
8369 remote_state = "down"
8372 if instance.admin_up:
8375 config_state = "down"
8377 disks = [self._ComputeDiskStatus(instance, None, device)
8378 for device in instance.disks]
8381 "name": instance.name,
8382 "config_state": config_state,
8383 "run_state": remote_state,
8384 "pnode": instance.primary_node,
8385 "snodes": instance.secondary_nodes,
8387 # this happens to be the same format used for hooks
8388 "nics": _NICListToTuple(self, instance.nics),
8389 "disk_template": instance.disk_template,
8391 "hypervisor": instance.hypervisor,
8392 "network_port": instance.network_port,
8393 "hv_instance": instance.hvparams,
8394 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8395 "be_instance": instance.beparams,
8396 "be_actual": cluster.FillBE(instance),
8397 "os_instance": instance.osparams,
8398 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8399 "serial_no": instance.serial_no,
8400 "mtime": instance.mtime,
8401 "ctime": instance.ctime,
8402 "uuid": instance.uuid,
8405 result[instance.name] = idict
8410 class LUSetInstanceParams(LogicalUnit):
8411 """Modifies an instances's parameters.
8414 HPATH = "instance-modify"
8415 HTYPE = constants.HTYPE_INSTANCE
8416 _OP_REQP = [("instance_name", _TNonEmptyString)]
8418 ("nics", _EmptyList),
8419 ("disks", _EmptyList),
8420 ("beparams", _EmptyDict),
8421 ("hvparams", _EmptyDict),
8422 ("disk_template", None),
8423 ("remote_node", None),
8425 ("force_variant", False),
8431 def CheckArguments(self):
8432 if not (self.op.nics or self.op.disks or self.op.disk_template or
8433 self.op.hvparams or self.op.beparams or self.op.os_name):
8434 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8436 if self.op.hvparams:
8437 _CheckGlobalHvParams(self.op.hvparams)
8441 for disk_op, disk_dict in self.op.disks:
8442 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8443 if disk_op == constants.DDM_REMOVE:
8446 elif disk_op == constants.DDM_ADD:
8449 if not isinstance(disk_op, int):
8450 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8451 if not isinstance(disk_dict, dict):
8452 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8453 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8455 if disk_op == constants.DDM_ADD:
8456 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8457 if mode not in constants.DISK_ACCESS_SET:
8458 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8460 size = disk_dict.get('size', None)
8462 raise errors.OpPrereqError("Required disk parameter size missing",
8466 except (TypeError, ValueError), err:
8467 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8468 str(err), errors.ECODE_INVAL)
8469 disk_dict['size'] = size
8471 # modification of disk
8472 if 'size' in disk_dict:
8473 raise errors.OpPrereqError("Disk size change not possible, use"
8474 " grow-disk", errors.ECODE_INVAL)
8476 if disk_addremove > 1:
8477 raise errors.OpPrereqError("Only one disk add or remove operation"
8478 " supported at a time", errors.ECODE_INVAL)
8480 if self.op.disks and self.op.disk_template is not None:
8481 raise errors.OpPrereqError("Disk template conversion and other disk"
8482 " changes not supported at the same time",
8485 if self.op.disk_template:
8486 _CheckDiskTemplate(self.op.disk_template)
8487 if (self.op.disk_template in constants.DTS_NET_MIRROR and
8488 self.op.remote_node is None):
8489 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8490 " one requires specifying a secondary node",
8495 for nic_op, nic_dict in self.op.nics:
8496 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8497 if nic_op == constants.DDM_REMOVE:
8500 elif nic_op == constants.DDM_ADD:
8503 if not isinstance(nic_op, int):
8504 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8505 if not isinstance(nic_dict, dict):
8506 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8507 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8509 # nic_dict should be a dict
8510 nic_ip = nic_dict.get('ip', None)
8511 if nic_ip is not None:
8512 if nic_ip.lower() == constants.VALUE_NONE:
8513 nic_dict['ip'] = None
8515 if not utils.IsValidIP(nic_ip):
8516 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8519 nic_bridge = nic_dict.get('bridge', None)
8520 nic_link = nic_dict.get('link', None)
8521 if nic_bridge and nic_link:
8522 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8523 " at the same time", errors.ECODE_INVAL)
8524 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8525 nic_dict['bridge'] = None
8526 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8527 nic_dict['link'] = None
8529 if nic_op == constants.DDM_ADD:
8530 nic_mac = nic_dict.get('mac', None)
8532 nic_dict['mac'] = constants.VALUE_AUTO
8534 if 'mac' in nic_dict:
8535 nic_mac = nic_dict['mac']
8536 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8537 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8539 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8540 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8541 " modifying an existing nic",
8544 if nic_addremove > 1:
8545 raise errors.OpPrereqError("Only one NIC add or remove operation"
8546 " supported at a time", errors.ECODE_INVAL)
8548 def ExpandNames(self):
8549 self._ExpandAndLockInstance()
8550 self.needed_locks[locking.LEVEL_NODE] = []
8551 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8553 def DeclareLocks(self, level):
8554 if level == locking.LEVEL_NODE:
8555 self._LockInstancesNodes()
8556 if self.op.disk_template and self.op.remote_node:
8557 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8558 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8560 def BuildHooksEnv(self):
8563 This runs on the master, primary and secondaries.
8567 if constants.BE_MEMORY in self.be_new:
8568 args['memory'] = self.be_new[constants.BE_MEMORY]
8569 if constants.BE_VCPUS in self.be_new:
8570 args['vcpus'] = self.be_new[constants.BE_VCPUS]
8571 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8572 # information at all.
8575 nic_override = dict(self.op.nics)
8576 for idx, nic in enumerate(self.instance.nics):
8577 if idx in nic_override:
8578 this_nic_override = nic_override[idx]
8580 this_nic_override = {}
8581 if 'ip' in this_nic_override:
8582 ip = this_nic_override['ip']
8585 if 'mac' in this_nic_override:
8586 mac = this_nic_override['mac']
8589 if idx in self.nic_pnew:
8590 nicparams = self.nic_pnew[idx]
8592 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8593 mode = nicparams[constants.NIC_MODE]
8594 link = nicparams[constants.NIC_LINK]
8595 args['nics'].append((ip, mac, mode, link))
8596 if constants.DDM_ADD in nic_override:
8597 ip = nic_override[constants.DDM_ADD].get('ip', None)
8598 mac = nic_override[constants.DDM_ADD]['mac']
8599 nicparams = self.nic_pnew[constants.DDM_ADD]
8600 mode = nicparams[constants.NIC_MODE]
8601 link = nicparams[constants.NIC_LINK]
8602 args['nics'].append((ip, mac, mode, link))
8603 elif constants.DDM_REMOVE in nic_override:
8604 del args['nics'][-1]
8606 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8607 if self.op.disk_template:
8608 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8609 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8612 def CheckPrereq(self):
8613 """Check prerequisites.
8615 This only checks the instance list against the existing names.
8618 # checking the new params on the primary/secondary nodes
8620 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8621 cluster = self.cluster = self.cfg.GetClusterInfo()
8622 assert self.instance is not None, \
8623 "Cannot retrieve locked instance %s" % self.op.instance_name
8624 pnode = instance.primary_node
8625 nodelist = list(instance.all_nodes)
8628 if self.op.os_name and not self.op.force:
8629 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8630 self.op.force_variant)
8631 instance_os = self.op.os_name
8633 instance_os = instance.os
8635 if self.op.disk_template:
8636 if instance.disk_template == self.op.disk_template:
8637 raise errors.OpPrereqError("Instance already has disk template %s" %
8638 instance.disk_template, errors.ECODE_INVAL)
8640 if (instance.disk_template,
8641 self.op.disk_template) not in self._DISK_CONVERSIONS:
8642 raise errors.OpPrereqError("Unsupported disk template conversion from"
8643 " %s to %s" % (instance.disk_template,
8644 self.op.disk_template),
8646 if self.op.disk_template in constants.DTS_NET_MIRROR:
8647 _CheckNodeOnline(self, self.op.remote_node)
8648 _CheckNodeNotDrained(self, self.op.remote_node)
8649 disks = [{"size": d.size} for d in instance.disks]
8650 required = _ComputeDiskSize(self.op.disk_template, disks)
8651 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8652 _CheckInstanceDown(self, instance, "cannot change disk template")
8654 # hvparams processing
8655 if self.op.hvparams:
8656 hv_type = instance.hypervisor
8657 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8658 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8659 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8662 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8663 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8664 self.hv_new = hv_new # the new actual values
8665 self.hv_inst = i_hvdict # the new dict (without defaults)
8667 self.hv_new = self.hv_inst = {}
8669 # beparams processing
8670 if self.op.beparams:
8671 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8673 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8674 be_new = cluster.SimpleFillBE(i_bedict)
8675 self.be_new = be_new # the new actual values
8676 self.be_inst = i_bedict # the new dict (without defaults)
8678 self.be_new = self.be_inst = {}
8680 # osparams processing
8681 if self.op.osparams:
8682 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8683 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8684 self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8685 self.os_inst = i_osdict # the new dict (without defaults)
8687 self.os_new = self.os_inst = {}
8691 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8692 mem_check_list = [pnode]
8693 if be_new[constants.BE_AUTO_BALANCE]:
8694 # either we changed auto_balance to yes or it was from before
8695 mem_check_list.extend(instance.secondary_nodes)
8696 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8697 instance.hypervisor)
8698 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8699 instance.hypervisor)
8700 pninfo = nodeinfo[pnode]
8701 msg = pninfo.fail_msg
8703 # Assume the primary node is unreachable and go ahead
8704 self.warn.append("Can't get info from primary node %s: %s" %
8706 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8707 self.warn.append("Node data from primary node %s doesn't contain"
8708 " free memory information" % pnode)
8709 elif instance_info.fail_msg:
8710 self.warn.append("Can't get instance runtime information: %s" %
8711 instance_info.fail_msg)
8713 if instance_info.payload:
8714 current_mem = int(instance_info.payload['memory'])
8716 # Assume instance not running
8717 # (there is a slight race condition here, but it's not very probable,
8718 # and we have no other way to check)
8720 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8721 pninfo.payload['memory_free'])
8723 raise errors.OpPrereqError("This change will prevent the instance"
8724 " from starting, due to %d MB of memory"
8725 " missing on its primary node" % miss_mem,
8728 if be_new[constants.BE_AUTO_BALANCE]:
8729 for node, nres in nodeinfo.items():
8730 if node not in instance.secondary_nodes:
8734 self.warn.append("Can't get info from secondary node %s: %s" %
8736 elif not isinstance(nres.payload.get('memory_free', None), int):
8737 self.warn.append("Secondary node %s didn't return free"
8738 " memory information" % node)
8739 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8740 self.warn.append("Not enough memory to failover instance to"
8741 " secondary node %s" % node)
8746 for nic_op, nic_dict in self.op.nics:
8747 if nic_op == constants.DDM_REMOVE:
8748 if not instance.nics:
8749 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8752 if nic_op != constants.DDM_ADD:
8754 if not instance.nics:
8755 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8756 " no NICs" % nic_op,
8758 if nic_op < 0 or nic_op >= len(instance.nics):
8759 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8761 (nic_op, len(instance.nics) - 1),
8763 old_nic_params = instance.nics[nic_op].nicparams
8764 old_nic_ip = instance.nics[nic_op].ip
8769 update_params_dict = dict([(key, nic_dict[key])
8770 for key in constants.NICS_PARAMETERS
8771 if key in nic_dict])
8773 if 'bridge' in nic_dict:
8774 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8776 new_nic_params = _GetUpdatedParams(old_nic_params,
8778 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8779 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8780 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8781 self.nic_pinst[nic_op] = new_nic_params
8782 self.nic_pnew[nic_op] = new_filled_nic_params
8783 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8785 if new_nic_mode == constants.NIC_MODE_BRIDGED:
8786 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8787 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8789 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8791 self.warn.append(msg)
8793 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8794 if new_nic_mode == constants.NIC_MODE_ROUTED:
8795 if 'ip' in nic_dict:
8796 nic_ip = nic_dict['ip']
8800 raise errors.OpPrereqError('Cannot set the nic ip to None'
8801 ' on a routed nic', errors.ECODE_INVAL)
8802 if 'mac' in nic_dict:
8803 nic_mac = nic_dict['mac']
8805 raise errors.OpPrereqError('Cannot set the nic mac to None',
8807 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8808 # otherwise generate the mac
8809 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8811 # or validate/reserve the current one
8813 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8814 except errors.ReservationError:
8815 raise errors.OpPrereqError("MAC address %s already in use"
8816 " in cluster" % nic_mac,
8817 errors.ECODE_NOTUNIQUE)
8820 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8821 raise errors.OpPrereqError("Disk operations not supported for"
8822 " diskless instances",
8824 for disk_op, _ in self.op.disks:
8825 if disk_op == constants.DDM_REMOVE:
8826 if len(instance.disks) == 1:
8827 raise errors.OpPrereqError("Cannot remove the last disk of"
8828 " an instance", errors.ECODE_INVAL)
8829 _CheckInstanceDown(self, instance, "cannot remove disks")
8831 if (disk_op == constants.DDM_ADD and
8832 len(instance.nics) >= constants.MAX_DISKS):
8833 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8834 " add more" % constants.MAX_DISKS,
8836 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8838 if disk_op < 0 or disk_op >= len(instance.disks):
8839 raise errors.OpPrereqError("Invalid disk index %s, valid values"
8841 (disk_op, len(instance.disks)),
8846 def _ConvertPlainToDrbd(self, feedback_fn):
8847 """Converts an instance from plain to drbd.
8850 feedback_fn("Converting template to drbd")
8851 instance = self.instance
8852 pnode = instance.primary_node
8853 snode = self.op.remote_node
8855 # create a fake disk info for _GenerateDiskTemplate
8856 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8857 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8858 instance.name, pnode, [snode],
8859 disk_info, None, None, 0)
8860 info = _GetInstanceInfoText(instance)
8861 feedback_fn("Creating aditional volumes...")
8862 # first, create the missing data and meta devices
8863 for disk in new_disks:
8864 # unfortunately this is... not too nice
8865 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8867 for child in disk.children:
8868 _CreateSingleBlockDev(self, snode, instance, child, info, True)
8869 # at this stage, all new LVs have been created, we can rename the
8871 feedback_fn("Renaming original volumes...")
8872 rename_list = [(o, n.children[0].logical_id)
8873 for (o, n) in zip(instance.disks, new_disks)]
8874 result = self.rpc.call_blockdev_rename(pnode, rename_list)
8875 result.Raise("Failed to rename original LVs")
8877 feedback_fn("Initializing DRBD devices...")
8878 # all child devices are in place, we can now create the DRBD devices
8879 for disk in new_disks:
8880 for node in [pnode, snode]:
8881 f_create = node == pnode
8882 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8884 # at this point, the instance has been modified
8885 instance.disk_template = constants.DT_DRBD8
8886 instance.disks = new_disks
8887 self.cfg.Update(instance, feedback_fn)
8889 # disks are created, waiting for sync
8890 disk_abort = not _WaitForSync(self, instance)
8892 raise errors.OpExecError("There are some degraded disks for"
8893 " this instance, please cleanup manually")
8895 def _ConvertDrbdToPlain(self, feedback_fn):
8896 """Converts an instance from drbd to plain.
8899 instance = self.instance
8900 assert len(instance.secondary_nodes) == 1
8901 pnode = instance.primary_node
8902 snode = instance.secondary_nodes[0]
8903 feedback_fn("Converting template to plain")
8905 old_disks = instance.disks
8906 new_disks = [d.children[0] for d in old_disks]
8908 # copy over size and mode
8909 for parent, child in zip(old_disks, new_disks):
8910 child.size = parent.size
8911 child.mode = parent.mode
8913 # update instance structure
8914 instance.disks = new_disks
8915 instance.disk_template = constants.DT_PLAIN
8916 self.cfg.Update(instance, feedback_fn)
8918 feedback_fn("Removing volumes on the secondary node...")
8919 for disk in old_disks:
8920 self.cfg.SetDiskID(disk, snode)
8921 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8923 self.LogWarning("Could not remove block device %s on node %s,"
8924 " continuing anyway: %s", disk.iv_name, snode, msg)
8926 feedback_fn("Removing unneeded volumes on the primary node...")
8927 for idx, disk in enumerate(old_disks):
8928 meta = disk.children[1]
8929 self.cfg.SetDiskID(meta, pnode)
8930 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8932 self.LogWarning("Could not remove metadata for disk %d on node %s,"
8933 " continuing anyway: %s", idx, pnode, msg)
8936 def Exec(self, feedback_fn):
8937 """Modifies an instance.
8939 All parameters take effect only at the next restart of the instance.
8942 # Process here the warnings from CheckPrereq, as we don't have a
8943 # feedback_fn there.
8944 for warn in self.warn:
8945 feedback_fn("WARNING: %s" % warn)
8948 instance = self.instance
8950 for disk_op, disk_dict in self.op.disks:
8951 if disk_op == constants.DDM_REMOVE:
8952 # remove the last disk
8953 device = instance.disks.pop()
8954 device_idx = len(instance.disks)
8955 for node, disk in device.ComputeNodeTree(instance.primary_node):
8956 self.cfg.SetDiskID(disk, node)
8957 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8959 self.LogWarning("Could not remove disk/%d on node %s: %s,"
8960 " continuing anyway", device_idx, node, msg)
8961 result.append(("disk/%d" % device_idx, "remove"))
8962 elif disk_op == constants.DDM_ADD:
8964 if instance.disk_template == constants.DT_FILE:
8965 file_driver, file_path = instance.disks[0].logical_id
8966 file_path = os.path.dirname(file_path)
8968 file_driver = file_path = None
8969 disk_idx_base = len(instance.disks)
8970 new_disk = _GenerateDiskTemplate(self,
8971 instance.disk_template,
8972 instance.name, instance.primary_node,
8973 instance.secondary_nodes,
8978 instance.disks.append(new_disk)
8979 info = _GetInstanceInfoText(instance)
8981 logging.info("Creating volume %s for instance %s",
8982 new_disk.iv_name, instance.name)
8983 # Note: this needs to be kept in sync with _CreateDisks
8985 for node in instance.all_nodes:
8986 f_create = node == instance.primary_node
8988 _CreateBlockDev(self, node, instance, new_disk,
8989 f_create, info, f_create)
8990 except errors.OpExecError, err:
8991 self.LogWarning("Failed to create volume %s (%s) on"
8993 new_disk.iv_name, new_disk, node, err)
8994 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8995 (new_disk.size, new_disk.mode)))
8997 # change a given disk
8998 instance.disks[disk_op].mode = disk_dict['mode']
8999 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9001 if self.op.disk_template:
9002 r_shut = _ShutdownInstanceDisks(self, instance)
9004 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9005 " proceed with disk template conversion")
9006 mode = (instance.disk_template, self.op.disk_template)
9008 self._DISK_CONVERSIONS[mode](self, feedback_fn)
9010 self.cfg.ReleaseDRBDMinors(instance.name)
9012 result.append(("disk_template", self.op.disk_template))
9015 for nic_op, nic_dict in self.op.nics:
9016 if nic_op == constants.DDM_REMOVE:
9017 # remove the last nic
9018 del instance.nics[-1]
9019 result.append(("nic.%d" % len(instance.nics), "remove"))
9020 elif nic_op == constants.DDM_ADD:
9021 # mac and bridge should be set, by now
9022 mac = nic_dict['mac']
9023 ip = nic_dict.get('ip', None)
9024 nicparams = self.nic_pinst[constants.DDM_ADD]
9025 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9026 instance.nics.append(new_nic)
9027 result.append(("nic.%d" % (len(instance.nics) - 1),
9028 "add:mac=%s,ip=%s,mode=%s,link=%s" %
9029 (new_nic.mac, new_nic.ip,
9030 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9031 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9034 for key in 'mac', 'ip':
9036 setattr(instance.nics[nic_op], key, nic_dict[key])
9037 if nic_op in self.nic_pinst:
9038 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9039 for key, val in nic_dict.iteritems():
9040 result.append(("nic.%s/%d" % (key, nic_op), val))
9043 if self.op.hvparams:
9044 instance.hvparams = self.hv_inst
9045 for key, val in self.op.hvparams.iteritems():
9046 result.append(("hv/%s" % key, val))
9049 if self.op.beparams:
9050 instance.beparams = self.be_inst
9051 for key, val in self.op.beparams.iteritems():
9052 result.append(("be/%s" % key, val))
9056 instance.os = self.op.os_name
9059 if self.op.osparams:
9060 instance.osparams = self.os_inst
9061 for key, val in self.op.osparams.iteritems():
9062 result.append(("os/%s" % key, val))
9064 self.cfg.Update(instance, feedback_fn)
9068 _DISK_CONVERSIONS = {
9069 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9070 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9074 class LUQueryExports(NoHooksLU):
9075 """Query the exports list
9078 _OP_REQP = [("nodes", _TListOf(_TNonEmptyString))]
9081 def ExpandNames(self):
9082 self.needed_locks = {}
9083 self.share_locks[locking.LEVEL_NODE] = 1
9084 if not self.op.nodes:
9085 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9087 self.needed_locks[locking.LEVEL_NODE] = \
9088 _GetWantedNodes(self, self.op.nodes)
9090 def Exec(self, feedback_fn):
9091 """Compute the list of all the exported system images.
9094 @return: a dictionary with the structure node->(export-list)
9095 where export-list is a list of the instances exported on
9099 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9100 rpcresult = self.rpc.call_export_list(self.nodes)
9102 for node in rpcresult:
9103 if rpcresult[node].fail_msg:
9104 result[node] = False
9106 result[node] = rpcresult[node].payload
9111 class LUPrepareExport(NoHooksLU):
9112 """Prepares an instance for an export and returns useful information.
9116 ("instance_name", _TNonEmptyString),
9117 ("mode", _TElemOf(constants.EXPORT_MODES)),
9121 def ExpandNames(self):
9122 self._ExpandAndLockInstance()
9124 def CheckPrereq(self):
9125 """Check prerequisites.
9128 instance_name = self.op.instance_name
9130 self.instance = self.cfg.GetInstanceInfo(instance_name)
9131 assert self.instance is not None, \
9132 "Cannot retrieve locked instance %s" % self.op.instance_name
9133 _CheckNodeOnline(self, self.instance.primary_node)
9135 self._cds = _GetClusterDomainSecret()
9137 def Exec(self, feedback_fn):
9138 """Prepares an instance for an export.
9141 instance = self.instance
9143 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9144 salt = utils.GenerateSecret(8)
9146 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9147 result = self.rpc.call_x509_cert_create(instance.primary_node,
9148 constants.RIE_CERT_VALIDITY)
9149 result.Raise("Can't create X509 key and certificate on %s" % result.node)
9151 (name, cert_pem) = result.payload
9153 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9157 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9158 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9160 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9166 class LUExportInstance(LogicalUnit):
9167 """Export an instance to an image in the cluster.
9170 HPATH = "instance-export"
9171 HTYPE = constants.HTYPE_INSTANCE
9173 ("instance_name", _TNonEmptyString),
9174 ("target_node", _TNonEmptyString),
9175 ("shutdown", _TBool),
9176 ("mode", _TElemOf(constants.EXPORT_MODES)),
9179 ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT),
9180 ("remove_instance", False),
9181 ("ignore_remove_failures", False),
9182 ("mode", constants.EXPORT_MODE_LOCAL),
9183 ("x509_key_name", None),
9184 ("destination_x509_ca", None),
9188 def CheckArguments(self):
9189 """Check the arguments.
9192 self.x509_key_name = self.op.x509_key_name
9193 self.dest_x509_ca_pem = self.op.destination_x509_ca
9195 if self.op.remove_instance and not self.op.shutdown:
9196 raise errors.OpPrereqError("Can not remove instance without shutting it"
9199 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9200 if not self.x509_key_name:
9201 raise errors.OpPrereqError("Missing X509 key name for encryption",
9204 if not self.dest_x509_ca_pem:
9205 raise errors.OpPrereqError("Missing destination X509 CA",
9208 def ExpandNames(self):
9209 self._ExpandAndLockInstance()
9211 # Lock all nodes for local exports
9212 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9213 # FIXME: lock only instance primary and destination node
9215 # Sad but true, for now we have do lock all nodes, as we don't know where
9216 # the previous export might be, and in this LU we search for it and
9217 # remove it from its current node. In the future we could fix this by:
9218 # - making a tasklet to search (share-lock all), then create the
9219 # new one, then one to remove, after
9220 # - removing the removal operation altogether
9221 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9223 def DeclareLocks(self, level):
9224 """Last minute lock declaration."""
9225 # All nodes are locked anyway, so nothing to do here.
9227 def BuildHooksEnv(self):
9230 This will run on the master, primary node and target node.
9234 "EXPORT_MODE": self.op.mode,
9235 "EXPORT_NODE": self.op.target_node,
9236 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9237 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9238 # TODO: Generic function for boolean env variables
9239 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9242 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9244 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9246 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9247 nl.append(self.op.target_node)
9251 def CheckPrereq(self):
9252 """Check prerequisites.
9254 This checks that the instance and node names are valid.
9257 instance_name = self.op.instance_name
9259 self.instance = self.cfg.GetInstanceInfo(instance_name)
9260 assert self.instance is not None, \
9261 "Cannot retrieve locked instance %s" % self.op.instance_name
9262 _CheckNodeOnline(self, self.instance.primary_node)
9264 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9265 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9266 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9267 assert self.dst_node is not None
9269 _CheckNodeOnline(self, self.dst_node.name)
9270 _CheckNodeNotDrained(self, self.dst_node.name)
9273 self.dest_disk_info = None
9274 self.dest_x509_ca = None
9276 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9277 self.dst_node = None
9279 if len(self.op.target_node) != len(self.instance.disks):
9280 raise errors.OpPrereqError(("Received destination information for %s"
9281 " disks, but instance %s has %s disks") %
9282 (len(self.op.target_node), instance_name,
9283 len(self.instance.disks)),
9286 cds = _GetClusterDomainSecret()
9288 # Check X509 key name
9290 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9291 except (TypeError, ValueError), err:
9292 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9294 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9295 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9298 # Load and verify CA
9300 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9301 except OpenSSL.crypto.Error, err:
9302 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9303 (err, ), errors.ECODE_INVAL)
9305 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9306 if errcode is not None:
9307 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9308 (msg, ), errors.ECODE_INVAL)
9310 self.dest_x509_ca = cert
9312 # Verify target information
9314 for idx, disk_data in enumerate(self.op.target_node):
9316 (host, port, magic) = \
9317 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9318 except errors.GenericError, err:
9319 raise errors.OpPrereqError("Target info for disk %s: %s" %
9320 (idx, err), errors.ECODE_INVAL)
9322 disk_info.append((host, port, magic))
9324 assert len(disk_info) == len(self.op.target_node)
9325 self.dest_disk_info = disk_info
9328 raise errors.ProgrammerError("Unhandled export mode %r" %
9331 # instance disk type verification
9332 # TODO: Implement export support for file-based disks
9333 for disk in self.instance.disks:
9334 if disk.dev_type == constants.LD_FILE:
9335 raise errors.OpPrereqError("Export not supported for instances with"
9336 " file-based disks", errors.ECODE_INVAL)
9338 def _CleanupExports(self, feedback_fn):
9339 """Removes exports of current instance from all other nodes.
9341 If an instance in a cluster with nodes A..D was exported to node C, its
9342 exports will be removed from the nodes A, B and D.
9345 assert self.op.mode != constants.EXPORT_MODE_REMOTE
9347 nodelist = self.cfg.GetNodeList()
9348 nodelist.remove(self.dst_node.name)
9350 # on one-node clusters nodelist will be empty after the removal
9351 # if we proceed the backup would be removed because OpQueryExports
9352 # substitutes an empty list with the full cluster node list.
9353 iname = self.instance.name
9355 feedback_fn("Removing old exports for instance %s" % iname)
9356 exportlist = self.rpc.call_export_list(nodelist)
9357 for node in exportlist:
9358 if exportlist[node].fail_msg:
9360 if iname in exportlist[node].payload:
9361 msg = self.rpc.call_export_remove(node, iname).fail_msg
9363 self.LogWarning("Could not remove older export for instance %s"
9364 " on node %s: %s", iname, node, msg)
9366 def Exec(self, feedback_fn):
9367 """Export an instance to an image in the cluster.
9370 assert self.op.mode in constants.EXPORT_MODES
9372 instance = self.instance
9373 src_node = instance.primary_node
9375 if self.op.shutdown:
9376 # shutdown the instance, but not the disks
9377 feedback_fn("Shutting down instance %s" % instance.name)
9378 result = self.rpc.call_instance_shutdown(src_node, instance,
9379 self.op.shutdown_timeout)
9380 # TODO: Maybe ignore failures if ignore_remove_failures is set
9381 result.Raise("Could not shutdown instance %s on"
9382 " node %s" % (instance.name, src_node))
9384 # set the disks ID correctly since call_instance_start needs the
9385 # correct drbd minor to create the symlinks
9386 for disk in instance.disks:
9387 self.cfg.SetDiskID(disk, src_node)
9389 activate_disks = (not instance.admin_up)
9392 # Activate the instance disks if we'exporting a stopped instance
9393 feedback_fn("Activating disks for %s" % instance.name)
9394 _StartInstanceDisks(self, instance, None)
9397 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9400 helper.CreateSnapshots()
9402 if (self.op.shutdown and instance.admin_up and
9403 not self.op.remove_instance):
9404 assert not activate_disks
9405 feedback_fn("Starting instance %s" % instance.name)
9406 result = self.rpc.call_instance_start(src_node, instance, None, None)
9407 msg = result.fail_msg
9409 feedback_fn("Failed to start instance: %s" % msg)
9410 _ShutdownInstanceDisks(self, instance)
9411 raise errors.OpExecError("Could not start instance: %s" % msg)
9413 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9414 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9415 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9416 connect_timeout = constants.RIE_CONNECT_TIMEOUT
9417 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9419 (key_name, _, _) = self.x509_key_name
9422 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9425 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9426 key_name, dest_ca_pem,
9431 # Check for backwards compatibility
9432 assert len(dresults) == len(instance.disks)
9433 assert compat.all(isinstance(i, bool) for i in dresults), \
9434 "Not all results are boolean: %r" % dresults
9438 feedback_fn("Deactivating disks for %s" % instance.name)
9439 _ShutdownInstanceDisks(self, instance)
9441 # Remove instance if requested
9442 if self.op.remove_instance:
9443 if not (compat.all(dresults) and fin_resu):
9444 feedback_fn("Not removing instance %s as parts of the export failed" %
9447 feedback_fn("Removing instance %s" % instance.name)
9448 _RemoveInstance(self, feedback_fn, instance,
9449 self.op.ignore_remove_failures)
9451 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9452 self._CleanupExports(feedback_fn)
9454 return fin_resu, dresults
9457 class LURemoveExport(NoHooksLU):
9458 """Remove exports related to the named instance.
9461 _OP_REQP = [("instance_name", _TNonEmptyString)]
9464 def ExpandNames(self):
9465 self.needed_locks = {}
9466 # We need all nodes to be locked in order for RemoveExport to work, but we
9467 # don't need to lock the instance itself, as nothing will happen to it (and
9468 # we can remove exports also for a removed instance)
9469 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9471 def Exec(self, feedback_fn):
9472 """Remove any export.
9475 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9476 # If the instance was not found we'll try with the name that was passed in.
9477 # This will only work if it was an FQDN, though.
9479 if not instance_name:
9481 instance_name = self.op.instance_name
9483 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9484 exportlist = self.rpc.call_export_list(locked_nodes)
9486 for node in exportlist:
9487 msg = exportlist[node].fail_msg
9489 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9491 if instance_name in exportlist[node].payload:
9493 result = self.rpc.call_export_remove(node, instance_name)
9494 msg = result.fail_msg
9496 logging.error("Could not remove export for instance %s"
9497 " on node %s: %s", instance_name, node, msg)
9499 if fqdn_warn and not found:
9500 feedback_fn("Export not found. If trying to remove an export belonging"
9501 " to a deleted instance please use its Fully Qualified"
9505 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9508 This is an abstract class which is the parent of all the other tags LUs.
9512 def ExpandNames(self):
9513 self.needed_locks = {}
9514 if self.op.kind == constants.TAG_NODE:
9515 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9516 self.needed_locks[locking.LEVEL_NODE] = self.op.name
9517 elif self.op.kind == constants.TAG_INSTANCE:
9518 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9519 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9521 def CheckPrereq(self):
9522 """Check prerequisites.
9525 if self.op.kind == constants.TAG_CLUSTER:
9526 self.target = self.cfg.GetClusterInfo()
9527 elif self.op.kind == constants.TAG_NODE:
9528 self.target = self.cfg.GetNodeInfo(self.op.name)
9529 elif self.op.kind == constants.TAG_INSTANCE:
9530 self.target = self.cfg.GetInstanceInfo(self.op.name)
9532 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9533 str(self.op.kind), errors.ECODE_INVAL)
9536 class LUGetTags(TagsLU):
9537 """Returns the tags of a given object.
9541 ("kind", _TElemOf(constants.VALID_TAG_TYPES)),
9542 ("name", _TNonEmptyString),
9546 def Exec(self, feedback_fn):
9547 """Returns the tag list.
9550 return list(self.target.GetTags())
9553 class LUSearchTags(NoHooksLU):
9554 """Searches the tags for a given pattern.
9557 _OP_REQP = [("pattern", _TNonEmptyString)]
9560 def ExpandNames(self):
9561 self.needed_locks = {}
9563 def CheckPrereq(self):
9564 """Check prerequisites.
9566 This checks the pattern passed for validity by compiling it.
9570 self.re = re.compile(self.op.pattern)
9571 except re.error, err:
9572 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9573 (self.op.pattern, err), errors.ECODE_INVAL)
9575 def Exec(self, feedback_fn):
9576 """Returns the tag list.
9580 tgts = [("/cluster", cfg.GetClusterInfo())]
9581 ilist = cfg.GetAllInstancesInfo().values()
9582 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9583 nlist = cfg.GetAllNodesInfo().values()
9584 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9586 for path, target in tgts:
9587 for tag in target.GetTags():
9588 if self.re.search(tag):
9589 results.append((path, tag))
9593 class LUAddTags(TagsLU):
9594 """Sets a tag on a given object.
9598 ("kind", _TElemOf(constants.VALID_TAG_TYPES)),
9599 ("name", _TNonEmptyString),
9600 ("tags", _TListOf(objects.TaggableObject.ValidateTag)),
9604 def CheckPrereq(self):
9605 """Check prerequisites.
9607 This checks the type and length of the tag name and value.
9610 TagsLU.CheckPrereq(self)
9611 for tag in self.op.tags:
9612 objects.TaggableObject.ValidateTag(tag)
9614 def Exec(self, feedback_fn):
9619 for tag in self.op.tags:
9620 self.target.AddTag(tag)
9621 except errors.TagError, err:
9622 raise errors.OpExecError("Error while setting tag: %s" % str(err))
9623 self.cfg.Update(self.target, feedback_fn)
9626 class LUDelTags(TagsLU):
9627 """Delete a list of tags from a given object.
9631 ("kind", _TElemOf(constants.VALID_TAG_TYPES)),
9632 ("name", _TNonEmptyString),
9633 ("tags", _TListOf(objects.TaggableObject.ValidateTag)),
9637 def CheckPrereq(self):
9638 """Check prerequisites.
9640 This checks that we have the given tag.
9643 TagsLU.CheckPrereq(self)
9644 for tag in self.op.tags:
9645 objects.TaggableObject.ValidateTag(tag)
9646 del_tags = frozenset(self.op.tags)
9647 cur_tags = self.target.GetTags()
9648 if not del_tags <= cur_tags:
9649 diff_tags = del_tags - cur_tags
9650 diff_names = ["'%s'" % tag for tag in diff_tags]
9652 raise errors.OpPrereqError("Tag(s) %s not found" %
9653 (",".join(diff_names)), errors.ECODE_NOENT)
9655 def Exec(self, feedback_fn):
9656 """Remove the tag from the object.
9659 for tag in self.op.tags:
9660 self.target.RemoveTag(tag)
9661 self.cfg.Update(self.target, feedback_fn)
9664 class LUTestDelay(NoHooksLU):
9665 """Sleep for a specified amount of time.
9667 This LU sleeps on the master and/or nodes for a specified amount of
9672 ("duration", _TFloat),
9673 ("on_master", _TBool),
9674 ("on_nodes", _TListOf(_TNonEmptyString)),
9675 ("repeat", _TPositiveInt)
9682 def ExpandNames(self):
9683 """Expand names and set required locks.
9685 This expands the node list, if any.
9688 self.needed_locks = {}
9689 if self.op.on_nodes:
9690 # _GetWantedNodes can be used here, but is not always appropriate to use
9691 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9693 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9694 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9696 def _TestDelay(self):
9697 """Do the actual sleep.
9700 if self.op.on_master:
9701 if not utils.TestDelay(self.op.duration):
9702 raise errors.OpExecError("Error during master delay test")
9703 if self.op.on_nodes:
9704 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9705 for node, node_result in result.items():
9706 node_result.Raise("Failure during rpc call to node %s" % node)
9708 def Exec(self, feedback_fn):
9709 """Execute the test delay opcode, with the wanted repetitions.
9712 if self.op.repeat == 0:
9715 top_value = self.op.repeat - 1
9716 for i in range(self.op.repeat):
9717 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9721 class IAllocator(object):
9722 """IAllocator framework.
9724 An IAllocator instance has three sets of attributes:
9725 - cfg that is needed to query the cluster
9726 - input data (all members of the _KEYS class attribute are required)
9727 - four buffer attributes (in|out_data|text), that represent the
9728 input (to the external script) in text and data structure format,
9729 and the output from it, again in two formats
9730 - the result variables from the script (success, info, nodes) for
9734 # pylint: disable-msg=R0902
9735 # lots of instance attributes
9737 "name", "mem_size", "disks", "disk_template",
9738 "os", "tags", "nics", "vcpus", "hypervisor",
9741 "name", "relocate_from",
9747 def __init__(self, cfg, rpc, mode, **kwargs):
9750 # init buffer variables
9751 self.in_text = self.out_text = self.in_data = self.out_data = None
9752 # init all input fields so that pylint is happy
9754 self.mem_size = self.disks = self.disk_template = None
9755 self.os = self.tags = self.nics = self.vcpus = None
9756 self.hypervisor = None
9757 self.relocate_from = None
9759 self.evac_nodes = None
9761 self.required_nodes = None
9762 # init result fields
9763 self.success = self.info = self.result = None
9764 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9765 keyset = self._ALLO_KEYS
9766 fn = self._AddNewInstance
9767 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9768 keyset = self._RELO_KEYS
9769 fn = self._AddRelocateInstance
9770 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9771 keyset = self._EVAC_KEYS
9772 fn = self._AddEvacuateNodes
9774 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9775 " IAllocator" % self.mode)
9777 if key not in keyset:
9778 raise errors.ProgrammerError("Invalid input parameter '%s' to"
9779 " IAllocator" % key)
9780 setattr(self, key, kwargs[key])
9783 if key not in kwargs:
9784 raise errors.ProgrammerError("Missing input parameter '%s' to"
9785 " IAllocator" % key)
9786 self._BuildInputData(fn)
9788 def _ComputeClusterData(self):
9789 """Compute the generic allocator input data.
9791 This is the data that is independent of the actual operation.
9795 cluster_info = cfg.GetClusterInfo()
9798 "version": constants.IALLOCATOR_VERSION,
9799 "cluster_name": cfg.GetClusterName(),
9800 "cluster_tags": list(cluster_info.GetTags()),
9801 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9802 # we don't have job IDs
9804 iinfo = cfg.GetAllInstancesInfo().values()
9805 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9809 node_list = cfg.GetNodeList()
9811 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9812 hypervisor_name = self.hypervisor
9813 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9814 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9815 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9816 hypervisor_name = cluster_info.enabled_hypervisors[0]
9818 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9821 self.rpc.call_all_instances_info(node_list,
9822 cluster_info.enabled_hypervisors)
9823 for nname, nresult in node_data.items():
9824 # first fill in static (config-based) values
9825 ninfo = cfg.GetNodeInfo(nname)
9827 "tags": list(ninfo.GetTags()),
9828 "primary_ip": ninfo.primary_ip,
9829 "secondary_ip": ninfo.secondary_ip,
9830 "offline": ninfo.offline,
9831 "drained": ninfo.drained,
9832 "master_candidate": ninfo.master_candidate,
9835 if not (ninfo.offline or ninfo.drained):
9836 nresult.Raise("Can't get data for node %s" % nname)
9837 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9839 remote_info = nresult.payload
9841 for attr in ['memory_total', 'memory_free', 'memory_dom0',
9842 'vg_size', 'vg_free', 'cpu_total']:
9843 if attr not in remote_info:
9844 raise errors.OpExecError("Node '%s' didn't return attribute"
9845 " '%s'" % (nname, attr))
9846 if not isinstance(remote_info[attr], int):
9847 raise errors.OpExecError("Node '%s' returned invalid value"
9849 (nname, attr, remote_info[attr]))
9850 # compute memory used by primary instances
9851 i_p_mem = i_p_up_mem = 0
9852 for iinfo, beinfo in i_list:
9853 if iinfo.primary_node == nname:
9854 i_p_mem += beinfo[constants.BE_MEMORY]
9855 if iinfo.name not in node_iinfo[nname].payload:
9858 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9859 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9860 remote_info['memory_free'] -= max(0, i_mem_diff)
9863 i_p_up_mem += beinfo[constants.BE_MEMORY]
9865 # compute memory used by instances
9867 "total_memory": remote_info['memory_total'],
9868 "reserved_memory": remote_info['memory_dom0'],
9869 "free_memory": remote_info['memory_free'],
9870 "total_disk": remote_info['vg_size'],
9871 "free_disk": remote_info['vg_free'],
9872 "total_cpus": remote_info['cpu_total'],
9873 "i_pri_memory": i_p_mem,
9874 "i_pri_up_memory": i_p_up_mem,
9878 node_results[nname] = pnr
9879 data["nodes"] = node_results
9883 for iinfo, beinfo in i_list:
9885 for nic in iinfo.nics:
9886 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
9887 nic_dict = {"mac": nic.mac,
9889 "mode": filled_params[constants.NIC_MODE],
9890 "link": filled_params[constants.NIC_LINK],
9892 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9893 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9894 nic_data.append(nic_dict)
9896 "tags": list(iinfo.GetTags()),
9897 "admin_up": iinfo.admin_up,
9898 "vcpus": beinfo[constants.BE_VCPUS],
9899 "memory": beinfo[constants.BE_MEMORY],
9901 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9903 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9904 "disk_template": iinfo.disk_template,
9905 "hypervisor": iinfo.hypervisor,
9907 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9909 instance_data[iinfo.name] = pir
9911 data["instances"] = instance_data
9915 def _AddNewInstance(self):
9916 """Add new instance data to allocator structure.
9918 This in combination with _AllocatorGetClusterData will create the
9919 correct structure needed as input for the allocator.
9921 The checks for the completeness of the opcode must have already been
9925 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9927 if self.disk_template in constants.DTS_NET_MIRROR:
9928 self.required_nodes = 2
9930 self.required_nodes = 1
9933 "disk_template": self.disk_template,
9936 "vcpus": self.vcpus,
9937 "memory": self.mem_size,
9938 "disks": self.disks,
9939 "disk_space_total": disk_space,
9941 "required_nodes": self.required_nodes,
9945 def _AddRelocateInstance(self):
9946 """Add relocate instance data to allocator structure.
9948 This in combination with _IAllocatorGetClusterData will create the
9949 correct structure needed as input for the allocator.
9951 The checks for the completeness of the opcode must have already been
9955 instance = self.cfg.GetInstanceInfo(self.name)
9956 if instance is None:
9957 raise errors.ProgrammerError("Unknown instance '%s' passed to"
9958 " IAllocator" % self.name)
9960 if instance.disk_template not in constants.DTS_NET_MIRROR:
9961 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9964 if len(instance.secondary_nodes) != 1:
9965 raise errors.OpPrereqError("Instance has not exactly one secondary node",
9968 self.required_nodes = 1
9969 disk_sizes = [{'size': disk.size} for disk in instance.disks]
9970 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9974 "disk_space_total": disk_space,
9975 "required_nodes": self.required_nodes,
9976 "relocate_from": self.relocate_from,
9980 def _AddEvacuateNodes(self):
9981 """Add evacuate nodes data to allocator structure.
9985 "evac_nodes": self.evac_nodes
9989 def _BuildInputData(self, fn):
9990 """Build input data structures.
9993 self._ComputeClusterData()
9996 request["type"] = self.mode
9997 self.in_data["request"] = request
9999 self.in_text = serializer.Dump(self.in_data)
10001 def Run(self, name, validate=True, call_fn=None):
10002 """Run an instance allocator and return the results.
10005 if call_fn is None:
10006 call_fn = self.rpc.call_iallocator_runner
10008 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10009 result.Raise("Failure while running the iallocator script")
10011 self.out_text = result.payload
10013 self._ValidateResult()
10015 def _ValidateResult(self):
10016 """Process the allocator results.
10018 This will process and if successful save the result in
10019 self.out_data and the other parameters.
10023 rdict = serializer.Load(self.out_text)
10024 except Exception, err:
10025 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10027 if not isinstance(rdict, dict):
10028 raise errors.OpExecError("Can't parse iallocator results: not a dict")
10030 # TODO: remove backwards compatiblity in later versions
10031 if "nodes" in rdict and "result" not in rdict:
10032 rdict["result"] = rdict["nodes"]
10035 for key in "success", "info", "result":
10036 if key not in rdict:
10037 raise errors.OpExecError("Can't parse iallocator results:"
10038 " missing key '%s'" % key)
10039 setattr(self, key, rdict[key])
10041 if not isinstance(rdict["result"], list):
10042 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10044 self.out_data = rdict
10047 class LUTestAllocator(NoHooksLU):
10048 """Run allocator tests.
10050 This LU runs the allocator tests
10054 ("direction", _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10055 ("mode", _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10056 ("name", _TNonEmptyString),
10057 ("nics", _TOr(_TNone, _TListOf(
10058 _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10059 _TOr(_TNone, _TNonEmptyString))))),
10060 ("disks", _TOr(_TNone, _TList)),
10063 ("hypervisor", None),
10064 ("allocator", None),
10069 def CheckPrereq(self):
10070 """Check prerequisites.
10072 This checks the opcode parameters depending on the director and mode test.
10075 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10076 for attr in ["mem_size", "disks", "disk_template",
10077 "os", "tags", "nics", "vcpus"]:
10078 if not hasattr(self.op, attr):
10079 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10080 attr, errors.ECODE_INVAL)
10081 iname = self.cfg.ExpandInstanceName(self.op.name)
10082 if iname is not None:
10083 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10084 iname, errors.ECODE_EXISTS)
10085 if not isinstance(self.op.nics, list):
10086 raise errors.OpPrereqError("Invalid parameter 'nics'",
10087 errors.ECODE_INVAL)
10088 if not isinstance(self.op.disks, list):
10089 raise errors.OpPrereqError("Invalid parameter 'disks'",
10090 errors.ECODE_INVAL)
10091 for row in self.op.disks:
10092 if (not isinstance(row, dict) or
10093 "size" not in row or
10094 not isinstance(row["size"], int) or
10095 "mode" not in row or
10096 row["mode"] not in ['r', 'w']):
10097 raise errors.OpPrereqError("Invalid contents of the 'disks'"
10098 " parameter", errors.ECODE_INVAL)
10099 if self.op.hypervisor is None:
10100 self.op.hypervisor = self.cfg.GetHypervisorType()
10101 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10102 fname = _ExpandInstanceName(self.cfg, self.op.name)
10103 self.op.name = fname
10104 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10105 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10106 if not hasattr(self.op, "evac_nodes"):
10107 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10108 " opcode input", errors.ECODE_INVAL)
10110 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10111 self.op.mode, errors.ECODE_INVAL)
10113 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10114 if self.op.allocator is None:
10115 raise errors.OpPrereqError("Missing allocator name",
10116 errors.ECODE_INVAL)
10117 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10118 raise errors.OpPrereqError("Wrong allocator test '%s'" %
10119 self.op.direction, errors.ECODE_INVAL)
10121 def Exec(self, feedback_fn):
10122 """Run the allocator test.
10125 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10126 ial = IAllocator(self.cfg, self.rpc,
10129 mem_size=self.op.mem_size,
10130 disks=self.op.disks,
10131 disk_template=self.op.disk_template,
10135 vcpus=self.op.vcpus,
10136 hypervisor=self.op.hypervisor,
10138 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10139 ial = IAllocator(self.cfg, self.rpc,
10142 relocate_from=list(self.relocate_from),
10144 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10145 ial = IAllocator(self.cfg, self.rpc,
10147 evac_nodes=self.op.evac_nodes)
10149 raise errors.ProgrammerError("Uncatched mode %s in"
10150 " LUTestAllocator.Exec", self.op.mode)
10152 if self.op.direction == constants.IALLOCATOR_DIR_IN:
10153 result = ial.in_text
10155 ial.Run(self.op.allocator, validate=False)
10156 result = ial.out_text