4 # Copyright (C) 2006, 2007, 2008 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
40 from ganeti import ssh
41 from ganeti import utils
42 from ganeti import errors
43 from ganeti import hypervisor
44 from ganeti import locking
45 from ganeti import constants
46 from ganeti import objects
47 from ganeti import serializer
48 from ganeti import ssconf
49 from ganeti import uidpool
50 from ganeti import compat
51 from ganeti import masterd
53 import ganeti.masterd.instance # pylint: disable-msg=W0611
56 # Modifiable default values; need to define these here before the
60 """Returns an empty list.
67 """Returns an empty dict.
75 """Checks if the given value is not None.
78 return val is not None
82 """Checks if the given value is None.
89 """Checks if the given value is a boolean.
92 return isinstance(val, bool)
96 """Checks if the given value is an integer.
99 return isinstance(val, int)
103 """Checks if the given value is a float.
106 return isinstance(val, float)
110 """Checks if the given value is a string.
113 return isinstance(val, basestring)
117 """Checks if a given value evaluates to a boolean True value.
123 def _TElemOf(target_list):
124 """Builds a function that checks if a given value is a member of a list.
127 return lambda val: val in target_list
132 """Checks if the given value is a list.
135 return isinstance(val, list)
139 """Checks if the given value is a dictionary.
142 return isinstance(val, dict)
147 """Combine multiple functions using an AND operation.
151 return compat.all(t(val) for t in args)
156 """Combine multiple functions using an AND operation.
160 return compat.any(t(val) for t in args)
167 _TNonEmptyString = _TAnd(_TString, _TTrue)
171 _TPositiveInt = _TAnd(_TInt, lambda v: v >= 0)
174 def _TListOf(my_type):
175 """Checks if a given value is a list with all elements of the same type.
179 lambda lst: compat.all(my_type(v) for v in lst))
182 def _TDictOf(key_type, val_type):
183 """Checks a dict type for the type of its key/values.
187 lambda my_dict: (compat.all(key_type(v) for v in my_dict.keys())
188 and compat.all(val_type(v)
189 for v in my_dict.values())))
193 class LogicalUnit(object):
194 """Logical Unit base class.
196 Subclasses must follow these rules:
197 - implement ExpandNames
198 - implement CheckPrereq (except when tasklets are used)
199 - implement Exec (except when tasklets are used)
200 - implement BuildHooksEnv
201 - redefine HPATH and HTYPE
202 - optionally redefine their run requirements:
203 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
205 Note that all commands require root permissions.
207 @ivar dry_run_result: the value (if any) that will be returned to the caller
208 in dry-run mode (signalled by opcode dry_run parameter)
209 @cvar _OP_DEFS: a list of opcode attributes and the defaults values
210 they should get if not already existing
219 def __init__(self, processor, op, context, rpc):
220 """Constructor for LogicalUnit.
222 This needs to be overridden in derived classes in order to check op
226 self.proc = processor
228 self.cfg = context.cfg
229 self.context = context
231 # Dicts used to declare locking needs to mcpu
232 self.needed_locks = None
233 self.acquired_locks = {}
234 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
236 self.remove_locks = {}
237 # Used to force good behavior when calling helper functions
238 self.recalculate_locks = {}
241 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
242 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
243 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
244 # support for dry-run
245 self.dry_run_result = None
246 # support for generic debug attribute
247 if (not hasattr(self.op, "debug_level") or
248 not isinstance(self.op.debug_level, int)):
249 self.op.debug_level = 0
254 for aname, aval in self._OP_DEFS:
255 if not hasattr(self.op, aname):
260 setattr(self.op, aname, dval)
262 for attr_name, test in self._OP_REQP:
263 if not hasattr(op, attr_name):
264 raise errors.OpPrereqError("Required parameter '%s' missing" %
265 attr_name, errors.ECODE_INVAL)
266 attr_val = getattr(op, attr_name, None)
267 if not callable(test):
268 raise errors.ProgrammerError("Validation for parameter '%s' failed,"
269 " given type is not a proper type (%s)" %
271 if not test(attr_val):
272 logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
273 self.op.OP_ID, attr_name, type(attr_val), attr_val)
274 raise errors.OpPrereqError("Parameter '%s' has invalid type" %
275 attr_name, errors.ECODE_INVAL)
277 self.CheckArguments()
280 """Returns the SshRunner object
284 self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
287 ssh = property(fget=__GetSSH)
289 def CheckArguments(self):
290 """Check syntactic validity for the opcode arguments.
292 This method is for doing a simple syntactic check and ensure
293 validity of opcode parameters, without any cluster-related
294 checks. While the same can be accomplished in ExpandNames and/or
295 CheckPrereq, doing these separate is better because:
297 - ExpandNames is left as as purely a lock-related function
298 - CheckPrereq is run after we have acquired locks (and possible
301 The function is allowed to change the self.op attribute so that
302 later methods can no longer worry about missing parameters.
307 def ExpandNames(self):
308 """Expand names for this LU.
310 This method is called before starting to execute the opcode, and it should
311 update all the parameters of the opcode to their canonical form (e.g. a
312 short node name must be fully expanded after this method has successfully
313 completed). This way locking, hooks, logging, ecc. can work correctly.
315 LUs which implement this method must also populate the self.needed_locks
316 member, as a dict with lock levels as keys, and a list of needed lock names
319 - use an empty dict if you don't need any lock
320 - if you don't need any lock at a particular level omit that level
321 - don't put anything for the BGL level
322 - if you want all locks at a level use locking.ALL_SET as a value
324 If you need to share locks (rather than acquire them exclusively) at one
325 level you can modify self.share_locks, setting a true value (usually 1) for
326 that level. By default locks are not shared.
328 This function can also define a list of tasklets, which then will be
329 executed in order instead of the usual LU-level CheckPrereq and Exec
330 functions, if those are not defined by the LU.
334 # Acquire all nodes and one instance
335 self.needed_locks = {
336 locking.LEVEL_NODE: locking.ALL_SET,
337 locking.LEVEL_INSTANCE: ['instance1.example.tld'],
339 # Acquire just two nodes
340 self.needed_locks = {
341 locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
344 self.needed_locks = {} # No, you can't leave it to the default value None
347 # The implementation of this method is mandatory only if the new LU is
348 # concurrent, so that old LUs don't need to be changed all at the same
351 self.needed_locks = {} # Exclusive LUs don't need locks.
353 raise NotImplementedError
355 def DeclareLocks(self, level):
356 """Declare LU locking needs for a level
358 While most LUs can just declare their locking needs at ExpandNames time,
359 sometimes there's the need to calculate some locks after having acquired
360 the ones before. This function is called just before acquiring locks at a
361 particular level, but after acquiring the ones at lower levels, and permits
362 such calculations. It can be used to modify self.needed_locks, and by
363 default it does nothing.
365 This function is only called if you have something already set in
366 self.needed_locks for the level.
368 @param level: Locking level which is going to be locked
369 @type level: member of ganeti.locking.LEVELS
373 def CheckPrereq(self):
374 """Check prerequisites for this LU.
376 This method should check that the prerequisites for the execution
377 of this LU are fulfilled. It can do internode communication, but
378 it should be idempotent - no cluster or system changes are
381 The method should raise errors.OpPrereqError in case something is
382 not fulfilled. Its return value is ignored.
384 This method should also update all the parameters of the opcode to
385 their canonical form if it hasn't been done by ExpandNames before.
388 if self.tasklets is not None:
389 for (idx, tl) in enumerate(self.tasklets):
390 logging.debug("Checking prerequisites for tasklet %s/%s",
391 idx + 1, len(self.tasklets))
396 def Exec(self, feedback_fn):
399 This method should implement the actual work. It should raise
400 errors.OpExecError for failures that are somewhat dealt with in
404 if self.tasklets is not None:
405 for (idx, tl) in enumerate(self.tasklets):
406 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
409 raise NotImplementedError
411 def BuildHooksEnv(self):
412 """Build hooks environment for this LU.
414 This method should return a three-node tuple consisting of: a dict
415 containing the environment that will be used for running the
416 specific hook for this LU, a list of node names on which the hook
417 should run before the execution, and a list of node names on which
418 the hook should run after the execution.
420 The keys of the dict must not have 'GANETI_' prefixed as this will
421 be handled in the hooks runner. Also note additional keys will be
422 added by the hooks runner. If the LU doesn't define any
423 environment, an empty dict (and not None) should be returned.
425 No nodes should be returned as an empty list (and not None).
427 Note that if the HPATH for a LU class is None, this function will
431 raise NotImplementedError
433 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
434 """Notify the LU about the results of its hooks.
436 This method is called every time a hooks phase is executed, and notifies
437 the Logical Unit about the hooks' result. The LU can then use it to alter
438 its result based on the hooks. By default the method does nothing and the
439 previous result is passed back unchanged but any LU can define it if it
440 wants to use the local cluster hook-scripts somehow.
442 @param phase: one of L{constants.HOOKS_PHASE_POST} or
443 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
444 @param hook_results: the results of the multi-node hooks rpc call
445 @param feedback_fn: function used send feedback back to the caller
446 @param lu_result: the previous Exec result this LU had, or None
448 @return: the new Exec result, based on the previous result
452 # API must be kept, thus we ignore the unused argument and could
453 # be a function warnings
454 # pylint: disable-msg=W0613,R0201
457 def _ExpandAndLockInstance(self):
458 """Helper function to expand and lock an instance.
460 Many LUs that work on an instance take its name in self.op.instance_name
461 and need to expand it and then declare the expanded name for locking. This
462 function does it, and then updates self.op.instance_name to the expanded
463 name. It also initializes needed_locks as a dict, if this hasn't been done
467 if self.needed_locks is None:
468 self.needed_locks = {}
470 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
471 "_ExpandAndLockInstance called with instance-level locks set"
472 self.op.instance_name = _ExpandInstanceName(self.cfg,
473 self.op.instance_name)
474 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
476 def _LockInstancesNodes(self, primary_only=False):
477 """Helper function to declare instances' nodes for locking.
479 This function should be called after locking one or more instances to lock
480 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
481 with all primary or secondary nodes for instances already locked and
482 present in self.needed_locks[locking.LEVEL_INSTANCE].
484 It should be called from DeclareLocks, and for safety only works if
485 self.recalculate_locks[locking.LEVEL_NODE] is set.
487 In the future it may grow parameters to just lock some instance's nodes, or
488 to just lock primaries or secondary nodes, if needed.
490 If should be called in DeclareLocks in a way similar to::
492 if level == locking.LEVEL_NODE:
493 self._LockInstancesNodes()
495 @type primary_only: boolean
496 @param primary_only: only lock primary nodes of locked instances
499 assert locking.LEVEL_NODE in self.recalculate_locks, \
500 "_LockInstancesNodes helper function called with no nodes to recalculate"
502 # TODO: check if we're really been called with the instance locks held
504 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
505 # future we might want to have different behaviors depending on the value
506 # of self.recalculate_locks[locking.LEVEL_NODE]
508 for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
509 instance = self.context.cfg.GetInstanceInfo(instance_name)
510 wanted_nodes.append(instance.primary_node)
512 wanted_nodes.extend(instance.secondary_nodes)
514 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
515 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
516 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
517 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
519 del self.recalculate_locks[locking.LEVEL_NODE]
522 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
523 """Simple LU which runs no hooks.
525 This LU is intended as a parent for other LogicalUnits which will
526 run no hooks, in order to reduce duplicate code.
532 def BuildHooksEnv(self):
533 """Empty BuildHooksEnv for NoHooksLu.
535 This just raises an error.
538 assert False, "BuildHooksEnv called for NoHooksLUs"
542 """Tasklet base class.
544 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
545 they can mix legacy code with tasklets. Locking needs to be done in the LU,
546 tasklets know nothing about locks.
548 Subclasses must follow these rules:
549 - Implement CheckPrereq
553 def __init__(self, lu):
560 def CheckPrereq(self):
561 """Check prerequisites for this tasklets.
563 This method should check whether the prerequisites for the execution of
564 this tasklet are fulfilled. It can do internode communication, but it
565 should be idempotent - no cluster or system changes are allowed.
567 The method should raise errors.OpPrereqError in case something is not
568 fulfilled. Its return value is ignored.
570 This method should also update all parameters to their canonical form if it
571 hasn't been done before.
576 def Exec(self, feedback_fn):
577 """Execute the tasklet.
579 This method should implement the actual work. It should raise
580 errors.OpExecError for failures that are somewhat dealt with in code, or
584 raise NotImplementedError
587 def _GetWantedNodes(lu, nodes):
588 """Returns list of checked and expanded node names.
590 @type lu: L{LogicalUnit}
591 @param lu: the logical unit on whose behalf we execute
593 @param nodes: list of node names or None for all nodes
595 @return: the list of nodes, sorted
596 @raise errors.ProgrammerError: if the nodes parameter is wrong type
600 raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
601 " non-empty list of nodes whose name is to be expanded.")
603 wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
604 return utils.NiceSort(wanted)
607 def _GetWantedInstances(lu, instances):
608 """Returns list of checked and expanded instance names.
610 @type lu: L{LogicalUnit}
611 @param lu: the logical unit on whose behalf we execute
612 @type instances: list
613 @param instances: list of instance names or None for all instances
615 @return: the list of instances, sorted
616 @raise errors.OpPrereqError: if the instances parameter is wrong type
617 @raise errors.OpPrereqError: if any of the passed instances is not found
621 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
623 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
627 def _GetUpdatedParams(old_params, update_dict,
628 use_default=True, use_none=False):
629 """Return the new version of a parameter dictionary.
631 @type old_params: dict
632 @param old_params: old parameters
633 @type update_dict: dict
634 @param update_dict: dict containing new parameter values, or
635 constants.VALUE_DEFAULT to reset the parameter to its default
637 @param use_default: boolean
638 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
639 values as 'to be deleted' values
640 @param use_none: boolean
641 @type use_none: whether to recognise C{None} values as 'to be
644 @return: the new parameter dictionary
647 params_copy = copy.deepcopy(old_params)
648 for key, val in update_dict.iteritems():
649 if ((use_default and val == constants.VALUE_DEFAULT) or
650 (use_none and val is None)):
656 params_copy[key] = val
660 def _CheckOutputFields(static, dynamic, selected):
661 """Checks whether all selected fields are valid.
663 @type static: L{utils.FieldSet}
664 @param static: static fields set
665 @type dynamic: L{utils.FieldSet}
666 @param dynamic: dynamic fields set
673 delta = f.NonMatching(selected)
675 raise errors.OpPrereqError("Unknown output fields selected: %s"
676 % ",".join(delta), errors.ECODE_INVAL)
679 def _CheckBooleanOpField(op, name):
680 """Validates boolean opcode parameters.
682 This will ensure that an opcode parameter is either a boolean value,
683 or None (but that it always exists).
686 val = getattr(op, name, None)
687 if not (val is None or isinstance(val, bool)):
688 raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
689 (name, str(val)), errors.ECODE_INVAL)
690 setattr(op, name, val)
693 def _CheckGlobalHvParams(params):
694 """Validates that given hypervisor params are not global ones.
696 This will ensure that instances don't get customised versions of
700 used_globals = constants.HVC_GLOBALS.intersection(params)
702 msg = ("The following hypervisor parameters are global and cannot"
703 " be customized at instance level, please modify them at"
704 " cluster level: %s" % utils.CommaJoin(used_globals))
705 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
708 def _CheckNodeOnline(lu, node):
709 """Ensure that a given node is online.
711 @param lu: the LU on behalf of which we make the check
712 @param node: the node to check
713 @raise errors.OpPrereqError: if the node is offline
716 if lu.cfg.GetNodeInfo(node).offline:
717 raise errors.OpPrereqError("Can't use offline node %s" % node,
721 def _CheckNodeNotDrained(lu, node):
722 """Ensure that a given node is not drained.
724 @param lu: the LU on behalf of which we make the check
725 @param node: the node to check
726 @raise errors.OpPrereqError: if the node is drained
729 if lu.cfg.GetNodeInfo(node).drained:
730 raise errors.OpPrereqError("Can't use drained node %s" % node,
734 def _CheckNodeHasOS(lu, node, os_name, force_variant):
735 """Ensure that a node supports a given OS.
737 @param lu: the LU on behalf of which we make the check
738 @param node: the node to check
739 @param os_name: the OS to query about
740 @param force_variant: whether to ignore variant errors
741 @raise errors.OpPrereqError: if the node is not supporting the OS
744 result = lu.rpc.call_os_get(node, os_name)
745 result.Raise("OS '%s' not in supported OS list for node %s" %
747 prereq=True, ecode=errors.ECODE_INVAL)
748 if not force_variant:
749 _CheckOSVariant(result.payload, os_name)
752 def _RequireFileStorage():
753 """Checks that file storage is enabled.
755 @raise errors.OpPrereqError: when file storage is disabled
758 if not constants.ENABLE_FILE_STORAGE:
759 raise errors.OpPrereqError("File storage disabled at configure time",
763 def _CheckDiskTemplate(template):
764 """Ensure a given disk template is valid.
767 if template not in constants.DISK_TEMPLATES:
768 msg = ("Invalid disk template name '%s', valid templates are: %s" %
769 (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
770 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
771 if template == constants.DT_FILE:
772 _RequireFileStorage()
775 def _CheckStorageType(storage_type):
776 """Ensure a given storage type is valid.
779 if storage_type not in constants.VALID_STORAGE_TYPES:
780 raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
782 if storage_type == constants.ST_FILE:
783 _RequireFileStorage()
787 def _GetClusterDomainSecret():
788 """Reads the cluster domain secret.
791 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
795 def _CheckInstanceDown(lu, instance, reason):
796 """Ensure that an instance is not running."""
797 if instance.admin_up:
798 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
799 (instance.name, reason), errors.ECODE_STATE)
801 pnode = instance.primary_node
802 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
803 ins_l.Raise("Can't contact node %s for instance information" % pnode,
804 prereq=True, ecode=errors.ECODE_ENVIRON)
806 if instance.name in ins_l.payload:
807 raise errors.OpPrereqError("Instance %s is running, %s" %
808 (instance.name, reason), errors.ECODE_STATE)
811 def _ExpandItemName(fn, name, kind):
812 """Expand an item name.
814 @param fn: the function to use for expansion
815 @param name: requested item name
816 @param kind: text description ('Node' or 'Instance')
817 @return: the resolved (full) name
818 @raise errors.OpPrereqError: if the item is not found
822 if full_name is None:
823 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
828 def _ExpandNodeName(cfg, name):
829 """Wrapper over L{_ExpandItemName} for nodes."""
830 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
833 def _ExpandInstanceName(cfg, name):
834 """Wrapper over L{_ExpandItemName} for instance."""
835 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
838 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
839 memory, vcpus, nics, disk_template, disks,
840 bep, hvp, hypervisor_name):
841 """Builds instance related env variables for hooks
843 This builds the hook environment from individual variables.
846 @param name: the name of the instance
847 @type primary_node: string
848 @param primary_node: the name of the instance's primary node
849 @type secondary_nodes: list
850 @param secondary_nodes: list of secondary nodes as strings
851 @type os_type: string
852 @param os_type: the name of the instance's OS
853 @type status: boolean
854 @param status: the should_run status of the instance
856 @param memory: the memory size of the instance
858 @param vcpus: the count of VCPUs the instance has
860 @param nics: list of tuples (ip, mac, mode, link) representing
861 the NICs the instance has
862 @type disk_template: string
863 @param disk_template: the disk template of the instance
865 @param disks: the list of (size, mode) pairs
867 @param bep: the backend parameters for the instance
869 @param hvp: the hypervisor parameters for the instance
870 @type hypervisor_name: string
871 @param hypervisor_name: the hypervisor for the instance
873 @return: the hook environment for this instance
882 "INSTANCE_NAME": name,
883 "INSTANCE_PRIMARY": primary_node,
884 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
885 "INSTANCE_OS_TYPE": os_type,
886 "INSTANCE_STATUS": str_status,
887 "INSTANCE_MEMORY": memory,
888 "INSTANCE_VCPUS": vcpus,
889 "INSTANCE_DISK_TEMPLATE": disk_template,
890 "INSTANCE_HYPERVISOR": hypervisor_name,
894 nic_count = len(nics)
895 for idx, (ip, mac, mode, link) in enumerate(nics):
898 env["INSTANCE_NIC%d_IP" % idx] = ip
899 env["INSTANCE_NIC%d_MAC" % idx] = mac
900 env["INSTANCE_NIC%d_MODE" % idx] = mode
901 env["INSTANCE_NIC%d_LINK" % idx] = link
902 if mode == constants.NIC_MODE_BRIDGED:
903 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
907 env["INSTANCE_NIC_COUNT"] = nic_count
910 disk_count = len(disks)
911 for idx, (size, mode) in enumerate(disks):
912 env["INSTANCE_DISK%d_SIZE" % idx] = size
913 env["INSTANCE_DISK%d_MODE" % idx] = mode
917 env["INSTANCE_DISK_COUNT"] = disk_count
919 for source, kind in [(bep, "BE"), (hvp, "HV")]:
920 for key, value in source.items():
921 env["INSTANCE_%s_%s" % (kind, key)] = value
926 def _NICListToTuple(lu, nics):
927 """Build a list of nic information tuples.
929 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
930 value in LUQueryInstanceData.
932 @type lu: L{LogicalUnit}
933 @param lu: the logical unit on whose behalf we execute
934 @type nics: list of L{objects.NIC}
935 @param nics: list of nics to convert to hooks tuples
939 cluster = lu.cfg.GetClusterInfo()
943 filled_params = cluster.SimpleFillNIC(nic.nicparams)
944 mode = filled_params[constants.NIC_MODE]
945 link = filled_params[constants.NIC_LINK]
946 hooks_nics.append((ip, mac, mode, link))
950 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
951 """Builds instance related env variables for hooks from an object.
953 @type lu: L{LogicalUnit}
954 @param lu: the logical unit on whose behalf we execute
955 @type instance: L{objects.Instance}
956 @param instance: the instance for which we should build the
959 @param override: dictionary with key/values that will override
962 @return: the hook environment dictionary
965 cluster = lu.cfg.GetClusterInfo()
966 bep = cluster.FillBE(instance)
967 hvp = cluster.FillHV(instance)
969 'name': instance.name,
970 'primary_node': instance.primary_node,
971 'secondary_nodes': instance.secondary_nodes,
972 'os_type': instance.os,
973 'status': instance.admin_up,
974 'memory': bep[constants.BE_MEMORY],
975 'vcpus': bep[constants.BE_VCPUS],
976 'nics': _NICListToTuple(lu, instance.nics),
977 'disk_template': instance.disk_template,
978 'disks': [(disk.size, disk.mode) for disk in instance.disks],
981 'hypervisor_name': instance.hypervisor,
984 args.update(override)
985 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
988 def _AdjustCandidatePool(lu, exceptions):
989 """Adjust the candidate pool after node operations.
992 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
994 lu.LogInfo("Promoted nodes to master candidate role: %s",
995 utils.CommaJoin(node.name for node in mod_list))
996 for name in mod_list:
997 lu.context.ReaddNode(name)
998 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1000 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1004 def _DecideSelfPromotion(lu, exceptions=None):
1005 """Decide whether I should promote myself as a master candidate.
1008 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1009 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1010 # the new node will increase mc_max with one, so:
1011 mc_should = min(mc_should + 1, cp_size)
1012 return mc_now < mc_should
1015 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1016 """Check that the brigdes needed by a list of nics exist.
1019 cluster = lu.cfg.GetClusterInfo()
1020 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1021 brlist = [params[constants.NIC_LINK] for params in paramslist
1022 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1024 result = lu.rpc.call_bridges_exist(target_node, brlist)
1025 result.Raise("Error checking bridges on destination node '%s'" %
1026 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1029 def _CheckInstanceBridgesExist(lu, instance, node=None):
1030 """Check that the brigdes needed by an instance exist.
1034 node = instance.primary_node
1035 _CheckNicsBridgesExist(lu, instance.nics, node)
1038 def _CheckOSVariant(os_obj, name):
1039 """Check whether an OS name conforms to the os variants specification.
1041 @type os_obj: L{objects.OS}
1042 @param os_obj: OS object to check
1044 @param name: OS name passed by the user, to check for validity
1047 if not os_obj.supported_variants:
1050 variant = name.split("+", 1)[1]
1052 raise errors.OpPrereqError("OS name must include a variant",
1055 if variant not in os_obj.supported_variants:
1056 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1059 def _GetNodeInstancesInner(cfg, fn):
1060 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1063 def _GetNodeInstances(cfg, node_name):
1064 """Returns a list of all primary and secondary instances on a node.
1068 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1071 def _GetNodePrimaryInstances(cfg, node_name):
1072 """Returns primary instances on a node.
1075 return _GetNodeInstancesInner(cfg,
1076 lambda inst: node_name == inst.primary_node)
1079 def _GetNodeSecondaryInstances(cfg, node_name):
1080 """Returns secondary instances on a node.
1083 return _GetNodeInstancesInner(cfg,
1084 lambda inst: node_name in inst.secondary_nodes)
1087 def _GetStorageTypeArgs(cfg, storage_type):
1088 """Returns the arguments for a storage type.
1091 # Special case for file storage
1092 if storage_type == constants.ST_FILE:
1093 # storage.FileStorage wants a list of storage directories
1094 return [[cfg.GetFileStorageDir()]]
1099 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1102 for dev in instance.disks:
1103 cfg.SetDiskID(dev, node_name)
1105 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1106 result.Raise("Failed to get disk status from node %s" % node_name,
1107 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1109 for idx, bdev_status in enumerate(result.payload):
1110 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1116 class LUPostInitCluster(LogicalUnit):
1117 """Logical unit for running hooks after cluster initialization.
1120 HPATH = "cluster-init"
1121 HTYPE = constants.HTYPE_CLUSTER
1124 def BuildHooksEnv(self):
1128 env = {"OP_TARGET": self.cfg.GetClusterName()}
1129 mn = self.cfg.GetMasterNode()
1130 return env, [], [mn]
1132 def Exec(self, feedback_fn):
1139 class LUDestroyCluster(LogicalUnit):
1140 """Logical unit for destroying the cluster.
1143 HPATH = "cluster-destroy"
1144 HTYPE = constants.HTYPE_CLUSTER
1147 def BuildHooksEnv(self):
1151 env = {"OP_TARGET": self.cfg.GetClusterName()}
1154 def CheckPrereq(self):
1155 """Check prerequisites.
1157 This checks whether the cluster is empty.
1159 Any errors are signaled by raising errors.OpPrereqError.
1162 master = self.cfg.GetMasterNode()
1164 nodelist = self.cfg.GetNodeList()
1165 if len(nodelist) != 1 or nodelist[0] != master:
1166 raise errors.OpPrereqError("There are still %d node(s) in"
1167 " this cluster." % (len(nodelist) - 1),
1169 instancelist = self.cfg.GetInstanceList()
1171 raise errors.OpPrereqError("There are still %d instance(s) in"
1172 " this cluster." % len(instancelist),
1175 def Exec(self, feedback_fn):
1176 """Destroys the cluster.
1179 master = self.cfg.GetMasterNode()
1180 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1182 # Run post hooks on master node before it's removed
1183 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1185 hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1187 # pylint: disable-msg=W0702
1188 self.LogWarning("Errors occurred running hooks on %s" % master)
1190 result = self.rpc.call_node_stop_master(master, False)
1191 result.Raise("Could not disable the master role")
1193 if modify_ssh_setup:
1194 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1195 utils.CreateBackup(priv_key)
1196 utils.CreateBackup(pub_key)
1201 def _VerifyCertificate(filename):
1202 """Verifies a certificate for LUVerifyCluster.
1204 @type filename: string
1205 @param filename: Path to PEM file
1209 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1210 utils.ReadFile(filename))
1211 except Exception, err: # pylint: disable-msg=W0703
1212 return (LUVerifyCluster.ETYPE_ERROR,
1213 "Failed to load X509 certificate %s: %s" % (filename, err))
1216 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1217 constants.SSL_CERT_EXPIRATION_ERROR)
1220 fnamemsg = "While verifying %s: %s" % (filename, msg)
1225 return (None, fnamemsg)
1226 elif errcode == utils.CERT_WARNING:
1227 return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1228 elif errcode == utils.CERT_ERROR:
1229 return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1231 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1234 class LUVerifyCluster(LogicalUnit):
1235 """Verifies the cluster status.
1238 HPATH = "cluster-verify"
1239 HTYPE = constants.HTYPE_CLUSTER
1241 ("skip_checks", _TListOf(_TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1242 ("verbose", _TBool),
1243 ("error_codes", _TBool),
1244 ("debug_simulate_errors", _TBool),
1248 TCLUSTER = "cluster"
1250 TINSTANCE = "instance"
1252 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1253 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1254 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1255 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1256 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1257 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1258 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1259 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1260 ENODEDRBD = (TNODE, "ENODEDRBD")
1261 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1262 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1263 ENODEHV = (TNODE, "ENODEHV")
1264 ENODELVM = (TNODE, "ENODELVM")
1265 ENODEN1 = (TNODE, "ENODEN1")
1266 ENODENET = (TNODE, "ENODENET")
1267 ENODEOS = (TNODE, "ENODEOS")
1268 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1269 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1270 ENODERPC = (TNODE, "ENODERPC")
1271 ENODESSH = (TNODE, "ENODESSH")
1272 ENODEVERSION = (TNODE, "ENODEVERSION")
1273 ENODESETUP = (TNODE, "ENODESETUP")
1274 ENODETIME = (TNODE, "ENODETIME")
1276 ETYPE_FIELD = "code"
1277 ETYPE_ERROR = "ERROR"
1278 ETYPE_WARNING = "WARNING"
1280 class NodeImage(object):
1281 """A class representing the logical and physical status of a node.
1284 @ivar name: the node name to which this object refers
1285 @ivar volumes: a structure as returned from
1286 L{ganeti.backend.GetVolumeList} (runtime)
1287 @ivar instances: a list of running instances (runtime)
1288 @ivar pinst: list of configured primary instances (config)
1289 @ivar sinst: list of configured secondary instances (config)
1290 @ivar sbp: diction of {secondary-node: list of instances} of all peers
1291 of this node (config)
1292 @ivar mfree: free memory, as reported by hypervisor (runtime)
1293 @ivar dfree: free disk, as reported by the node (runtime)
1294 @ivar offline: the offline status (config)
1295 @type rpc_fail: boolean
1296 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1297 not whether the individual keys were correct) (runtime)
1298 @type lvm_fail: boolean
1299 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1300 @type hyp_fail: boolean
1301 @ivar hyp_fail: whether the RPC call didn't return the instance list
1302 @type ghost: boolean
1303 @ivar ghost: whether this is a known node or not (config)
1304 @type os_fail: boolean
1305 @ivar os_fail: whether the RPC call didn't return valid OS data
1307 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1310 def __init__(self, offline=False, name=None):
1319 self.offline = offline
1320 self.rpc_fail = False
1321 self.lvm_fail = False
1322 self.hyp_fail = False
1324 self.os_fail = False
1327 def ExpandNames(self):
1328 self.needed_locks = {
1329 locking.LEVEL_NODE: locking.ALL_SET,
1330 locking.LEVEL_INSTANCE: locking.ALL_SET,
1332 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1334 def _Error(self, ecode, item, msg, *args, **kwargs):
1335 """Format an error message.
1337 Based on the opcode's error_codes parameter, either format a
1338 parseable error code, or a simpler error string.
1340 This must be called only from Exec and functions called from Exec.
1343 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1345 # first complete the msg
1348 # then format the whole message
1349 if self.op.error_codes:
1350 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1356 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1357 # and finally report it via the feedback_fn
1358 self._feedback_fn(" - %s" % msg)
1360 def _ErrorIf(self, cond, *args, **kwargs):
1361 """Log an error message if the passed condition is True.
1364 cond = bool(cond) or self.op.debug_simulate_errors
1366 self._Error(*args, **kwargs)
1367 # do not mark the operation as failed for WARN cases only
1368 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1369 self.bad = self.bad or cond
1371 def _VerifyNode(self, ninfo, nresult):
1372 """Run multiple tests against a node.
1376 - compares ganeti version
1377 - checks vg existence and size > 20G
1378 - checks config file checksum
1379 - checks ssh to other nodes
1381 @type ninfo: L{objects.Node}
1382 @param ninfo: the node to check
1383 @param nresult: the results from the node
1385 @return: whether overall this call was successful (and we can expect
1386 reasonable values in the respose)
1390 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1392 # main result, nresult should be a non-empty dict
1393 test = not nresult or not isinstance(nresult, dict)
1394 _ErrorIf(test, self.ENODERPC, node,
1395 "unable to verify node: no data returned")
1399 # compares ganeti version
1400 local_version = constants.PROTOCOL_VERSION
1401 remote_version = nresult.get("version", None)
1402 test = not (remote_version and
1403 isinstance(remote_version, (list, tuple)) and
1404 len(remote_version) == 2)
1405 _ErrorIf(test, self.ENODERPC, node,
1406 "connection to node returned invalid data")
1410 test = local_version != remote_version[0]
1411 _ErrorIf(test, self.ENODEVERSION, node,
1412 "incompatible protocol versions: master %s,"
1413 " node %s", local_version, remote_version[0])
1417 # node seems compatible, we can actually try to look into its results
1419 # full package version
1420 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1421 self.ENODEVERSION, node,
1422 "software version mismatch: master %s, node %s",
1423 constants.RELEASE_VERSION, remote_version[1],
1424 code=self.ETYPE_WARNING)
1426 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1427 if isinstance(hyp_result, dict):
1428 for hv_name, hv_result in hyp_result.iteritems():
1429 test = hv_result is not None
1430 _ErrorIf(test, self.ENODEHV, node,
1431 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1434 test = nresult.get(constants.NV_NODESETUP,
1435 ["Missing NODESETUP results"])
1436 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1441 def _VerifyNodeTime(self, ninfo, nresult,
1442 nvinfo_starttime, nvinfo_endtime):
1443 """Check the node time.
1445 @type ninfo: L{objects.Node}
1446 @param ninfo: the node to check
1447 @param nresult: the remote results for the node
1448 @param nvinfo_starttime: the start time of the RPC call
1449 @param nvinfo_endtime: the end time of the RPC call
1453 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1455 ntime = nresult.get(constants.NV_TIME, None)
1457 ntime_merged = utils.MergeTime(ntime)
1458 except (ValueError, TypeError):
1459 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1462 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1463 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1464 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1465 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1469 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1470 "Node time diverges by at least %s from master node time",
1473 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1474 """Check the node time.
1476 @type ninfo: L{objects.Node}
1477 @param ninfo: the node to check
1478 @param nresult: the remote results for the node
1479 @param vg_name: the configured VG name
1486 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1488 # checks vg existence and size > 20G
1489 vglist = nresult.get(constants.NV_VGLIST, None)
1491 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1493 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1494 constants.MIN_VG_SIZE)
1495 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1498 pvlist = nresult.get(constants.NV_PVLIST, None)
1499 test = pvlist is None
1500 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1502 # check that ':' is not present in PV names, since it's a
1503 # special character for lvcreate (denotes the range of PEs to
1505 for _, pvname, owner_vg in pvlist:
1506 test = ":" in pvname
1507 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1508 " '%s' of VG '%s'", pvname, owner_vg)
1510 def _VerifyNodeNetwork(self, ninfo, nresult):
1511 """Check the node time.
1513 @type ninfo: L{objects.Node}
1514 @param ninfo: the node to check
1515 @param nresult: the remote results for the node
1519 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1521 test = constants.NV_NODELIST not in nresult
1522 _ErrorIf(test, self.ENODESSH, node,
1523 "node hasn't returned node ssh connectivity data")
1525 if nresult[constants.NV_NODELIST]:
1526 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1527 _ErrorIf(True, self.ENODESSH, node,
1528 "ssh communication with node '%s': %s", a_node, a_msg)
1530 test = constants.NV_NODENETTEST not in nresult
1531 _ErrorIf(test, self.ENODENET, node,
1532 "node hasn't returned node tcp connectivity data")
1534 if nresult[constants.NV_NODENETTEST]:
1535 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1537 _ErrorIf(True, self.ENODENET, node,
1538 "tcp communication with node '%s': %s",
1539 anode, nresult[constants.NV_NODENETTEST][anode])
1541 test = constants.NV_MASTERIP not in nresult
1542 _ErrorIf(test, self.ENODENET, node,
1543 "node hasn't returned node master IP reachability data")
1545 if not nresult[constants.NV_MASTERIP]:
1546 if node == self.master_node:
1547 msg = "the master node cannot reach the master IP (not configured?)"
1549 msg = "cannot reach the master IP"
1550 _ErrorIf(True, self.ENODENET, node, msg)
1553 def _VerifyInstance(self, instance, instanceconfig, node_image):
1554 """Verify an instance.
1556 This function checks to see if the required block devices are
1557 available on the instance's node.
1560 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1561 node_current = instanceconfig.primary_node
1563 node_vol_should = {}
1564 instanceconfig.MapLVsByNode(node_vol_should)
1566 for node in node_vol_should:
1567 n_img = node_image[node]
1568 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1569 # ignore missing volumes on offline or broken nodes
1571 for volume in node_vol_should[node]:
1572 test = volume not in n_img.volumes
1573 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1574 "volume %s missing on node %s", volume, node)
1576 if instanceconfig.admin_up:
1577 pri_img = node_image[node_current]
1578 test = instance not in pri_img.instances and not pri_img.offline
1579 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1580 "instance not running on its primary node %s",
1583 for node, n_img in node_image.items():
1584 if (not node == node_current):
1585 test = instance in n_img.instances
1586 _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1587 "instance should not run on node %s", node)
1589 def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1590 """Verify if there are any unknown volumes in the cluster.
1592 The .os, .swap and backup volumes are ignored. All other volumes are
1593 reported as unknown.
1596 for node, n_img in node_image.items():
1597 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1598 # skip non-healthy nodes
1600 for volume in n_img.volumes:
1601 test = (node not in node_vol_should or
1602 volume not in node_vol_should[node])
1603 self._ErrorIf(test, self.ENODEORPHANLV, node,
1604 "volume %s is unknown", volume)
1606 def _VerifyOrphanInstances(self, instancelist, node_image):
1607 """Verify the list of running instances.
1609 This checks what instances are running but unknown to the cluster.
1612 for node, n_img in node_image.items():
1613 for o_inst in n_img.instances:
1614 test = o_inst not in instancelist
1615 self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1616 "instance %s on node %s should not exist", o_inst, node)
1618 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1619 """Verify N+1 Memory Resilience.
1621 Check that if one single node dies we can still start all the
1622 instances it was primary for.
1625 for node, n_img in node_image.items():
1626 # This code checks that every node which is now listed as
1627 # secondary has enough memory to host all instances it is
1628 # supposed to should a single other node in the cluster fail.
1629 # FIXME: not ready for failover to an arbitrary node
1630 # FIXME: does not support file-backed instances
1631 # WARNING: we currently take into account down instances as well
1632 # as up ones, considering that even if they're down someone
1633 # might want to start them even in the event of a node failure.
1634 for prinode, instances in n_img.sbp.items():
1636 for instance in instances:
1637 bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1638 if bep[constants.BE_AUTO_BALANCE]:
1639 needed_mem += bep[constants.BE_MEMORY]
1640 test = n_img.mfree < needed_mem
1641 self._ErrorIf(test, self.ENODEN1, node,
1642 "not enough memory on to accommodate"
1643 " failovers should peer node %s fail", prinode)
1645 def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1647 """Verifies and computes the node required file checksums.
1649 @type ninfo: L{objects.Node}
1650 @param ninfo: the node to check
1651 @param nresult: the remote results for the node
1652 @param file_list: required list of files
1653 @param local_cksum: dictionary of local files and their checksums
1654 @param master_files: list of files that only masters should have
1658 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1660 remote_cksum = nresult.get(constants.NV_FILELIST, None)
1661 test = not isinstance(remote_cksum, dict)
1662 _ErrorIf(test, self.ENODEFILECHECK, node,
1663 "node hasn't returned file checksum data")
1667 for file_name in file_list:
1668 node_is_mc = ninfo.master_candidate
1669 must_have = (file_name not in master_files) or node_is_mc
1671 test1 = file_name not in remote_cksum
1673 test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1675 test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1676 _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1677 "file '%s' missing", file_name)
1678 _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1679 "file '%s' has wrong checksum", file_name)
1680 # not candidate and this is not a must-have file
1681 _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1682 "file '%s' should not exist on non master"
1683 " candidates (and the file is outdated)", file_name)
1684 # all good, except non-master/non-must have combination
1685 _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1686 "file '%s' should not exist"
1687 " on non master candidates", file_name)
1689 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1690 """Verifies and the node DRBD status.
1692 @type ninfo: L{objects.Node}
1693 @param ninfo: the node to check
1694 @param nresult: the remote results for the node
1695 @param instanceinfo: the dict of instances
1696 @param drbd_map: the DRBD map as returned by
1697 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1701 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1703 # compute the DRBD minors
1705 for minor, instance in drbd_map[node].items():
1706 test = instance not in instanceinfo
1707 _ErrorIf(test, self.ECLUSTERCFG, None,
1708 "ghost instance '%s' in temporary DRBD map", instance)
1709 # ghost instance should not be running, but otherwise we
1710 # don't give double warnings (both ghost instance and
1711 # unallocated minor in use)
1713 node_drbd[minor] = (instance, False)
1715 instance = instanceinfo[instance]
1716 node_drbd[minor] = (instance.name, instance.admin_up)
1718 # and now check them
1719 used_minors = nresult.get(constants.NV_DRBDLIST, [])
1720 test = not isinstance(used_minors, (tuple, list))
1721 _ErrorIf(test, self.ENODEDRBD, node,
1722 "cannot parse drbd status file: %s", str(used_minors))
1724 # we cannot check drbd status
1727 for minor, (iname, must_exist) in node_drbd.items():
1728 test = minor not in used_minors and must_exist
1729 _ErrorIf(test, self.ENODEDRBD, node,
1730 "drbd minor %d of instance %s is not active", minor, iname)
1731 for minor in used_minors:
1732 test = minor not in node_drbd
1733 _ErrorIf(test, self.ENODEDRBD, node,
1734 "unallocated drbd minor %d is in use", minor)
1736 def _UpdateNodeOS(self, ninfo, nresult, nimg):
1737 """Builds the node OS structures.
1739 @type ninfo: L{objects.Node}
1740 @param ninfo: the node to check
1741 @param nresult: the remote results for the node
1742 @param nimg: the node image object
1746 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1748 remote_os = nresult.get(constants.NV_OSLIST, None)
1749 test = (not isinstance(remote_os, list) or
1750 not compat.all(isinstance(v, list) and len(v) == 7
1751 for v in remote_os))
1753 _ErrorIf(test, self.ENODEOS, node,
1754 "node hasn't returned valid OS data")
1763 for (name, os_path, status, diagnose,
1764 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1766 if name not in os_dict:
1769 # parameters is a list of lists instead of list of tuples due to
1770 # JSON lacking a real tuple type, fix it:
1771 parameters = [tuple(v) for v in parameters]
1772 os_dict[name].append((os_path, status, diagnose,
1773 set(variants), set(parameters), set(api_ver)))
1775 nimg.oslist = os_dict
1777 def _VerifyNodeOS(self, ninfo, nimg, base):
1778 """Verifies the node OS list.
1780 @type ninfo: L{objects.Node}
1781 @param ninfo: the node to check
1782 @param nimg: the node image object
1783 @param base: the 'template' node we match against (e.g. from the master)
1787 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1789 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1791 for os_name, os_data in nimg.oslist.items():
1792 assert os_data, "Empty OS status for OS %s?!" % os_name
1793 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1794 _ErrorIf(not f_status, self.ENODEOS, node,
1795 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1796 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1797 "OS '%s' has multiple entries (first one shadows the rest): %s",
1798 os_name, utils.CommaJoin([v[0] for v in os_data]))
1799 # this will catched in backend too
1800 _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1801 and not f_var, self.ENODEOS, node,
1802 "OS %s with API at least %d does not declare any variant",
1803 os_name, constants.OS_API_V15)
1804 # comparisons with the 'base' image
1805 test = os_name not in base.oslist
1806 _ErrorIf(test, self.ENODEOS, node,
1807 "Extra OS %s not present on reference node (%s)",
1811 assert base.oslist[os_name], "Base node has empty OS status?"
1812 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1814 # base OS is invalid, skipping
1816 for kind, a, b in [("API version", f_api, b_api),
1817 ("variants list", f_var, b_var),
1818 ("parameters", f_param, b_param)]:
1819 _ErrorIf(a != b, self.ENODEOS, node,
1820 "OS %s %s differs from reference node %s: %s vs. %s",
1821 kind, os_name, base.name,
1822 utils.CommaJoin(a), utils.CommaJoin(b))
1824 # check any missing OSes
1825 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1826 _ErrorIf(missing, self.ENODEOS, node,
1827 "OSes present on reference node %s but missing on this node: %s",
1828 base.name, utils.CommaJoin(missing))
1830 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1831 """Verifies and updates the node volume data.
1833 This function will update a L{NodeImage}'s internal structures
1834 with data from the remote call.
1836 @type ninfo: L{objects.Node}
1837 @param ninfo: the node to check
1838 @param nresult: the remote results for the node
1839 @param nimg: the node image object
1840 @param vg_name: the configured VG name
1844 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1846 nimg.lvm_fail = True
1847 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1850 elif isinstance(lvdata, basestring):
1851 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1852 utils.SafeEncode(lvdata))
1853 elif not isinstance(lvdata, dict):
1854 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1856 nimg.volumes = lvdata
1857 nimg.lvm_fail = False
1859 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1860 """Verifies and updates the node instance list.
1862 If the listing was successful, then updates this node's instance
1863 list. Otherwise, it marks the RPC call as failed for the instance
1866 @type ninfo: L{objects.Node}
1867 @param ninfo: the node to check
1868 @param nresult: the remote results for the node
1869 @param nimg: the node image object
1872 idata = nresult.get(constants.NV_INSTANCELIST, None)
1873 test = not isinstance(idata, list)
1874 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1875 " (instancelist): %s", utils.SafeEncode(str(idata)))
1877 nimg.hyp_fail = True
1879 nimg.instances = idata
1881 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1882 """Verifies and computes a node information map
1884 @type ninfo: L{objects.Node}
1885 @param ninfo: the node to check
1886 @param nresult: the remote results for the node
1887 @param nimg: the node image object
1888 @param vg_name: the configured VG name
1892 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1894 # try to read free memory (from the hypervisor)
1895 hv_info = nresult.get(constants.NV_HVINFO, None)
1896 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1897 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1900 nimg.mfree = int(hv_info["memory_free"])
1901 except (ValueError, TypeError):
1902 _ErrorIf(True, self.ENODERPC, node,
1903 "node returned invalid nodeinfo, check hypervisor")
1905 # FIXME: devise a free space model for file based instances as well
1906 if vg_name is not None:
1907 test = (constants.NV_VGLIST not in nresult or
1908 vg_name not in nresult[constants.NV_VGLIST])
1909 _ErrorIf(test, self.ENODELVM, node,
1910 "node didn't return data for the volume group '%s'"
1911 " - it is either missing or broken", vg_name)
1914 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1915 except (ValueError, TypeError):
1916 _ErrorIf(True, self.ENODERPC, node,
1917 "node returned invalid LVM info, check LVM status")
1919 def BuildHooksEnv(self):
1922 Cluster-Verify hooks just ran in the post phase and their failure makes
1923 the output be logged in the verify output and the verification to fail.
1926 all_nodes = self.cfg.GetNodeList()
1928 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1930 for node in self.cfg.GetAllNodesInfo().values():
1931 env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1933 return env, [], all_nodes
1935 def Exec(self, feedback_fn):
1936 """Verify integrity of cluster, performing various test on nodes.
1940 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1941 verbose = self.op.verbose
1942 self._feedback_fn = feedback_fn
1943 feedback_fn("* Verifying global settings")
1944 for msg in self.cfg.VerifyConfig():
1945 _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1947 # Check the cluster certificates
1948 for cert_filename in constants.ALL_CERT_FILES:
1949 (errcode, msg) = _VerifyCertificate(cert_filename)
1950 _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1952 vg_name = self.cfg.GetVGName()
1953 hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1954 cluster = self.cfg.GetClusterInfo()
1955 nodelist = utils.NiceSort(self.cfg.GetNodeList())
1956 nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1957 instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1958 instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1959 for iname in instancelist)
1960 i_non_redundant = [] # Non redundant instances
1961 i_non_a_balanced = [] # Non auto-balanced instances
1962 n_offline = 0 # Count of offline nodes
1963 n_drained = 0 # Count of nodes being drained
1964 node_vol_should = {}
1966 # FIXME: verify OS list
1967 # do local checksums
1968 master_files = [constants.CLUSTER_CONF_FILE]
1969 master_node = self.master_node = self.cfg.GetMasterNode()
1970 master_ip = self.cfg.GetMasterIP()
1972 file_names = ssconf.SimpleStore().GetFileList()
1973 file_names.extend(constants.ALL_CERT_FILES)
1974 file_names.extend(master_files)
1975 if cluster.modify_etc_hosts:
1976 file_names.append(constants.ETC_HOSTS)
1978 local_checksums = utils.FingerprintFiles(file_names)
1980 feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1981 node_verify_param = {
1982 constants.NV_FILELIST: file_names,
1983 constants.NV_NODELIST: [node.name for node in nodeinfo
1984 if not node.offline],
1985 constants.NV_HYPERVISOR: hypervisors,
1986 constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1987 node.secondary_ip) for node in nodeinfo
1988 if not node.offline],
1989 constants.NV_INSTANCELIST: hypervisors,
1990 constants.NV_VERSION: None,
1991 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1992 constants.NV_NODESETUP: None,
1993 constants.NV_TIME: None,
1994 constants.NV_MASTERIP: (master_node, master_ip),
1995 constants.NV_OSLIST: None,
1998 if vg_name is not None:
1999 node_verify_param[constants.NV_VGLIST] = None
2000 node_verify_param[constants.NV_LVLIST] = vg_name
2001 node_verify_param[constants.NV_PVLIST] = [vg_name]
2002 node_verify_param[constants.NV_DRBDLIST] = None
2004 # Build our expected cluster state
2005 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2007 for node in nodeinfo)
2009 for instance in instancelist:
2010 inst_config = instanceinfo[instance]
2012 for nname in inst_config.all_nodes:
2013 if nname not in node_image:
2015 gnode = self.NodeImage(name=nname)
2017 node_image[nname] = gnode
2019 inst_config.MapLVsByNode(node_vol_should)
2021 pnode = inst_config.primary_node
2022 node_image[pnode].pinst.append(instance)
2024 for snode in inst_config.secondary_nodes:
2025 nimg = node_image[snode]
2026 nimg.sinst.append(instance)
2027 if pnode not in nimg.sbp:
2028 nimg.sbp[pnode] = []
2029 nimg.sbp[pnode].append(instance)
2031 # At this point, we have the in-memory data structures complete,
2032 # except for the runtime information, which we'll gather next
2034 # Due to the way our RPC system works, exact response times cannot be
2035 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2036 # time before and after executing the request, we can at least have a time
2038 nvinfo_starttime = time.time()
2039 all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2040 self.cfg.GetClusterName())
2041 nvinfo_endtime = time.time()
2043 all_drbd_map = self.cfg.ComputeDRBDMap()
2045 feedback_fn("* Verifying node status")
2049 for node_i in nodeinfo:
2051 nimg = node_image[node]
2055 feedback_fn("* Skipping offline node %s" % (node,))
2059 if node == master_node:
2061 elif node_i.master_candidate:
2062 ntype = "master candidate"
2063 elif node_i.drained:
2069 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2071 msg = all_nvinfo[node].fail_msg
2072 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2074 nimg.rpc_fail = True
2077 nresult = all_nvinfo[node].payload
2079 nimg.call_ok = self._VerifyNode(node_i, nresult)
2080 self._VerifyNodeNetwork(node_i, nresult)
2081 self._VerifyNodeLVM(node_i, nresult, vg_name)
2082 self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2084 self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
2085 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2087 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2088 self._UpdateNodeInstances(node_i, nresult, nimg)
2089 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2090 self._UpdateNodeOS(node_i, nresult, nimg)
2091 if not nimg.os_fail:
2092 if refos_img is None:
2094 self._VerifyNodeOS(node_i, nimg, refos_img)
2096 feedback_fn("* Verifying instance status")
2097 for instance in instancelist:
2099 feedback_fn("* Verifying instance %s" % instance)
2100 inst_config = instanceinfo[instance]
2101 self._VerifyInstance(instance, inst_config, node_image)
2102 inst_nodes_offline = []
2104 pnode = inst_config.primary_node
2105 pnode_img = node_image[pnode]
2106 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2107 self.ENODERPC, pnode, "instance %s, connection to"
2108 " primary node failed", instance)
2110 if pnode_img.offline:
2111 inst_nodes_offline.append(pnode)
2113 # If the instance is non-redundant we cannot survive losing its primary
2114 # node, so we are not N+1 compliant. On the other hand we have no disk
2115 # templates with more than one secondary so that situation is not well
2117 # FIXME: does not support file-backed instances
2118 if not inst_config.secondary_nodes:
2119 i_non_redundant.append(instance)
2120 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2121 instance, "instance has multiple secondary nodes: %s",
2122 utils.CommaJoin(inst_config.secondary_nodes),
2123 code=self.ETYPE_WARNING)
2125 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2126 i_non_a_balanced.append(instance)
2128 for snode in inst_config.secondary_nodes:
2129 s_img = node_image[snode]
2130 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2131 "instance %s, connection to secondary node failed", instance)
2134 inst_nodes_offline.append(snode)
2136 # warn that the instance lives on offline nodes
2137 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2138 "instance lives on offline node(s) %s",
2139 utils.CommaJoin(inst_nodes_offline))
2140 # ... or ghost nodes
2141 for node in inst_config.all_nodes:
2142 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2143 "instance lives on ghost node %s", node)
2145 feedback_fn("* Verifying orphan volumes")
2146 self._VerifyOrphanVolumes(node_vol_should, node_image)
2148 feedback_fn("* Verifying orphan instances")
2149 self._VerifyOrphanInstances(instancelist, node_image)
2151 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2152 feedback_fn("* Verifying N+1 Memory redundancy")
2153 self._VerifyNPlusOneMemory(node_image, instanceinfo)
2155 feedback_fn("* Other Notes")
2157 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2158 % len(i_non_redundant))
2160 if i_non_a_balanced:
2161 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2162 % len(i_non_a_balanced))
2165 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2168 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2172 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2173 """Analyze the post-hooks' result
2175 This method analyses the hook result, handles it, and sends some
2176 nicely-formatted feedback back to the user.
2178 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2179 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2180 @param hooks_results: the results of the multi-node hooks rpc call
2181 @param feedback_fn: function used send feedback back to the caller
2182 @param lu_result: previous Exec result
2183 @return: the new Exec result, based on the previous result
2187 # We only really run POST phase hooks, and are only interested in
2189 if phase == constants.HOOKS_PHASE_POST:
2190 # Used to change hooks' output to proper indentation
2191 indent_re = re.compile('^', re.M)
2192 feedback_fn("* Hooks Results")
2193 assert hooks_results, "invalid result from hooks"
2195 for node_name in hooks_results:
2196 res = hooks_results[node_name]
2198 test = msg and not res.offline
2199 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2200 "Communication failure in hooks execution: %s", msg)
2201 if res.offline or msg:
2202 # No need to investigate payload if node is offline or gave an error.
2203 # override manually lu_result here as _ErrorIf only
2204 # overrides self.bad
2207 for script, hkr, output in res.payload:
2208 test = hkr == constants.HKR_FAIL
2209 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2210 "Script %s failed, output:", script)
2212 output = indent_re.sub(' ', output)
2213 feedback_fn("%s" % output)
2219 class LUVerifyDisks(NoHooksLU):
2220 """Verifies the cluster disks status.
2226 def ExpandNames(self):
2227 self.needed_locks = {
2228 locking.LEVEL_NODE: locking.ALL_SET,
2229 locking.LEVEL_INSTANCE: locking.ALL_SET,
2231 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2233 def Exec(self, feedback_fn):
2234 """Verify integrity of cluster disks.
2236 @rtype: tuple of three items
2237 @return: a tuple of (dict of node-to-node_error, list of instances
2238 which need activate-disks, dict of instance: (node, volume) for
2242 result = res_nodes, res_instances, res_missing = {}, [], {}
2244 vg_name = self.cfg.GetVGName()
2245 nodes = utils.NiceSort(self.cfg.GetNodeList())
2246 instances = [self.cfg.GetInstanceInfo(name)
2247 for name in self.cfg.GetInstanceList()]
2250 for inst in instances:
2252 if (not inst.admin_up or
2253 inst.disk_template not in constants.DTS_NET_MIRROR):
2255 inst.MapLVsByNode(inst_lvs)
2256 # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2257 for node, vol_list in inst_lvs.iteritems():
2258 for vol in vol_list:
2259 nv_dict[(node, vol)] = inst
2264 node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2268 node_res = node_lvs[node]
2269 if node_res.offline:
2271 msg = node_res.fail_msg
2273 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2274 res_nodes[node] = msg
2277 lvs = node_res.payload
2278 for lv_name, (_, _, lv_online) in lvs.items():
2279 inst = nv_dict.pop((node, lv_name), None)
2280 if (not lv_online and inst is not None
2281 and inst.name not in res_instances):
2282 res_instances.append(inst.name)
2284 # any leftover items in nv_dict are missing LVs, let's arrange the
2286 for key, inst in nv_dict.iteritems():
2287 if inst.name not in res_missing:
2288 res_missing[inst.name] = []
2289 res_missing[inst.name].append(key)
2294 class LURepairDiskSizes(NoHooksLU):
2295 """Verifies the cluster disks sizes.
2298 _OP_REQP = [("instances", _TListOf(_TNonEmptyString))]
2301 def ExpandNames(self):
2302 if self.op.instances:
2303 self.wanted_names = []
2304 for name in self.op.instances:
2305 full_name = _ExpandInstanceName(self.cfg, name)
2306 self.wanted_names.append(full_name)
2307 self.needed_locks = {
2308 locking.LEVEL_NODE: [],
2309 locking.LEVEL_INSTANCE: self.wanted_names,
2311 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2313 self.wanted_names = None
2314 self.needed_locks = {
2315 locking.LEVEL_NODE: locking.ALL_SET,
2316 locking.LEVEL_INSTANCE: locking.ALL_SET,
2318 self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2320 def DeclareLocks(self, level):
2321 if level == locking.LEVEL_NODE and self.wanted_names is not None:
2322 self._LockInstancesNodes(primary_only=True)
2324 def CheckPrereq(self):
2325 """Check prerequisites.
2327 This only checks the optional instance list against the existing names.
2330 if self.wanted_names is None:
2331 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2333 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2334 in self.wanted_names]
2336 def _EnsureChildSizes(self, disk):
2337 """Ensure children of the disk have the needed disk size.
2339 This is valid mainly for DRBD8 and fixes an issue where the
2340 children have smaller disk size.
2342 @param disk: an L{ganeti.objects.Disk} object
2345 if disk.dev_type == constants.LD_DRBD8:
2346 assert disk.children, "Empty children for DRBD8?"
2347 fchild = disk.children[0]
2348 mismatch = fchild.size < disk.size
2350 self.LogInfo("Child disk has size %d, parent %d, fixing",
2351 fchild.size, disk.size)
2352 fchild.size = disk.size
2354 # and we recurse on this child only, not on the metadev
2355 return self._EnsureChildSizes(fchild) or mismatch
2359 def Exec(self, feedback_fn):
2360 """Verify the size of cluster disks.
2363 # TODO: check child disks too
2364 # TODO: check differences in size between primary/secondary nodes
2366 for instance in self.wanted_instances:
2367 pnode = instance.primary_node
2368 if pnode not in per_node_disks:
2369 per_node_disks[pnode] = []
2370 for idx, disk in enumerate(instance.disks):
2371 per_node_disks[pnode].append((instance, idx, disk))
2374 for node, dskl in per_node_disks.items():
2375 newl = [v[2].Copy() for v in dskl]
2377 self.cfg.SetDiskID(dsk, node)
2378 result = self.rpc.call_blockdev_getsizes(node, newl)
2380 self.LogWarning("Failure in blockdev_getsizes call to node"
2381 " %s, ignoring", node)
2383 if len(result.data) != len(dskl):
2384 self.LogWarning("Invalid result from node %s, ignoring node results",
2387 for ((instance, idx, disk), size) in zip(dskl, result.data):
2389 self.LogWarning("Disk %d of instance %s did not return size"
2390 " information, ignoring", idx, instance.name)
2392 if not isinstance(size, (int, long)):
2393 self.LogWarning("Disk %d of instance %s did not return valid"
2394 " size information, ignoring", idx, instance.name)
2397 if size != disk.size:
2398 self.LogInfo("Disk %d of instance %s has mismatched size,"
2399 " correcting: recorded %d, actual %d", idx,
2400 instance.name, disk.size, size)
2402 self.cfg.Update(instance, feedback_fn)
2403 changed.append((instance.name, idx, size))
2404 if self._EnsureChildSizes(disk):
2405 self.cfg.Update(instance, feedback_fn)
2406 changed.append((instance.name, idx, disk.size))
2410 class LURenameCluster(LogicalUnit):
2411 """Rename the cluster.
2414 HPATH = "cluster-rename"
2415 HTYPE = constants.HTYPE_CLUSTER
2416 _OP_REQP = [("name", _TNonEmptyString)]
2418 def BuildHooksEnv(self):
2423 "OP_TARGET": self.cfg.GetClusterName(),
2424 "NEW_NAME": self.op.name,
2426 mn = self.cfg.GetMasterNode()
2427 all_nodes = self.cfg.GetNodeList()
2428 return env, [mn], all_nodes
2430 def CheckPrereq(self):
2431 """Verify that the passed name is a valid one.
2434 hostname = utils.GetHostInfo(self.op.name)
2436 new_name = hostname.name
2437 self.ip = new_ip = hostname.ip
2438 old_name = self.cfg.GetClusterName()
2439 old_ip = self.cfg.GetMasterIP()
2440 if new_name == old_name and new_ip == old_ip:
2441 raise errors.OpPrereqError("Neither the name nor the IP address of the"
2442 " cluster has changed",
2444 if new_ip != old_ip:
2445 if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2446 raise errors.OpPrereqError("The given cluster IP address (%s) is"
2447 " reachable on the network. Aborting." %
2448 new_ip, errors.ECODE_NOTUNIQUE)
2450 self.op.name = new_name
2452 def Exec(self, feedback_fn):
2453 """Rename the cluster.
2456 clustername = self.op.name
2459 # shutdown the master IP
2460 master = self.cfg.GetMasterNode()
2461 result = self.rpc.call_node_stop_master(master, False)
2462 result.Raise("Could not disable the master role")
2465 cluster = self.cfg.GetClusterInfo()
2466 cluster.cluster_name = clustername
2467 cluster.master_ip = ip
2468 self.cfg.Update(cluster, feedback_fn)
2470 # update the known hosts file
2471 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2472 node_list = self.cfg.GetNodeList()
2474 node_list.remove(master)
2477 result = self.rpc.call_upload_file(node_list,
2478 constants.SSH_KNOWN_HOSTS_FILE)
2479 for to_node, to_result in result.iteritems():
2480 msg = to_result.fail_msg
2482 msg = ("Copy of file %s to node %s failed: %s" %
2483 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2484 self.proc.LogWarning(msg)
2487 result = self.rpc.call_node_start_master(master, False, False)
2488 msg = result.fail_msg
2490 self.LogWarning("Could not re-enable the master role on"
2491 " the master, please restart manually: %s", msg)
2494 def _RecursiveCheckIfLVMBased(disk):
2495 """Check if the given disk or its children are lvm-based.
2497 @type disk: L{objects.Disk}
2498 @param disk: the disk to check
2500 @return: boolean indicating whether a LD_LV dev_type was found or not
2504 for chdisk in disk.children:
2505 if _RecursiveCheckIfLVMBased(chdisk):
2507 return disk.dev_type == constants.LD_LV
2510 class LUSetClusterParams(LogicalUnit):
2511 """Change the parameters of the cluster.
2514 HPATH = "cluster-modify"
2515 HTYPE = constants.HTYPE_CLUSTER
2517 ("hvparams", _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2518 ("os_hvp", _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2519 ("osparams", _TOr(_TDictOf(_TNonEmptyString, _TDict), _TNone)),
2520 ("enabled_hypervisors",
2521 _TOr(_TAnd(_TListOf(_TElemOf(constants.HYPER_TYPES)), _TTrue), _TNone)),
2524 ("candidate_pool_size", None),
2527 ("remove_uids", None),
2534 def CheckArguments(self):
2538 if self.op.candidate_pool_size is not None:
2540 self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2541 except (ValueError, TypeError), err:
2542 raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2543 str(err), errors.ECODE_INVAL)
2544 if self.op.candidate_pool_size < 1:
2545 raise errors.OpPrereqError("At least one master candidate needed",
2548 _CheckBooleanOpField(self.op, "maintain_node_health")
2550 if self.op.uid_pool:
2551 uidpool.CheckUidPool(self.op.uid_pool)
2553 if self.op.add_uids:
2554 uidpool.CheckUidPool(self.op.add_uids)
2556 if self.op.remove_uids:
2557 uidpool.CheckUidPool(self.op.remove_uids)
2559 def ExpandNames(self):
2560 # FIXME: in the future maybe other cluster params won't require checking on
2561 # all nodes to be modified.
2562 self.needed_locks = {
2563 locking.LEVEL_NODE: locking.ALL_SET,
2565 self.share_locks[locking.LEVEL_NODE] = 1
2567 def BuildHooksEnv(self):
2572 "OP_TARGET": self.cfg.GetClusterName(),
2573 "NEW_VG_NAME": self.op.vg_name,
2575 mn = self.cfg.GetMasterNode()
2576 return env, [mn], [mn]
2578 def CheckPrereq(self):
2579 """Check prerequisites.
2581 This checks whether the given params don't conflict and
2582 if the given volume group is valid.
2585 if self.op.vg_name is not None and not self.op.vg_name:
2586 instances = self.cfg.GetAllInstancesInfo().values()
2587 for inst in instances:
2588 for disk in inst.disks:
2589 if _RecursiveCheckIfLVMBased(disk):
2590 raise errors.OpPrereqError("Cannot disable lvm storage while"
2591 " lvm-based instances exist",
2594 node_list = self.acquired_locks[locking.LEVEL_NODE]
2596 # if vg_name not None, checks given volume group on all nodes
2598 vglist = self.rpc.call_vg_list(node_list)
2599 for node in node_list:
2600 msg = vglist[node].fail_msg
2602 # ignoring down node
2603 self.LogWarning("Error while gathering data on node %s"
2604 " (ignoring node): %s", node, msg)
2606 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2608 constants.MIN_VG_SIZE)
2610 raise errors.OpPrereqError("Error on node '%s': %s" %
2611 (node, vgstatus), errors.ECODE_ENVIRON)
2613 self.cluster = cluster = self.cfg.GetClusterInfo()
2614 # validate params changes
2615 if self.op.beparams:
2616 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2617 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2619 if self.op.nicparams:
2620 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2621 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2622 objects.NIC.CheckParameterSyntax(self.new_nicparams)
2625 # check all instances for consistency
2626 for instance in self.cfg.GetAllInstancesInfo().values():
2627 for nic_idx, nic in enumerate(instance.nics):
2628 params_copy = copy.deepcopy(nic.nicparams)
2629 params_filled = objects.FillDict(self.new_nicparams, params_copy)
2631 # check parameter syntax
2633 objects.NIC.CheckParameterSyntax(params_filled)
2634 except errors.ConfigurationError, err:
2635 nic_errors.append("Instance %s, nic/%d: %s" %
2636 (instance.name, nic_idx, err))
2638 # if we're moving instances to routed, check that they have an ip
2639 target_mode = params_filled[constants.NIC_MODE]
2640 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2641 nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2642 (instance.name, nic_idx))
2644 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2645 "\n".join(nic_errors))
2647 # hypervisor list/parameters
2648 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2649 if self.op.hvparams:
2650 for hv_name, hv_dict in self.op.hvparams.items():
2651 if hv_name not in self.new_hvparams:
2652 self.new_hvparams[hv_name] = hv_dict
2654 self.new_hvparams[hv_name].update(hv_dict)
2656 # os hypervisor parameters
2657 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2659 for os_name, hvs in self.op.os_hvp.items():
2660 if os_name not in self.new_os_hvp:
2661 self.new_os_hvp[os_name] = hvs
2663 for hv_name, hv_dict in hvs.items():
2664 if hv_name not in self.new_os_hvp[os_name]:
2665 self.new_os_hvp[os_name][hv_name] = hv_dict
2667 self.new_os_hvp[os_name][hv_name].update(hv_dict)
2670 self.new_osp = objects.FillDict(cluster.osparams, {})
2671 if self.op.osparams:
2672 for os_name, osp in self.op.osparams.items():
2673 if os_name not in self.new_osp:
2674 self.new_osp[os_name] = {}
2676 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2679 if not self.new_osp[os_name]:
2680 # we removed all parameters
2681 del self.new_osp[os_name]
2683 # check the parameter validity (remote check)
2684 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2685 os_name, self.new_osp[os_name])
2687 # changes to the hypervisor list
2688 if self.op.enabled_hypervisors is not None:
2689 self.hv_list = self.op.enabled_hypervisors
2690 for hv in self.hv_list:
2691 # if the hypervisor doesn't already exist in the cluster
2692 # hvparams, we initialize it to empty, and then (in both
2693 # cases) we make sure to fill the defaults, as we might not
2694 # have a complete defaults list if the hypervisor wasn't
2696 if hv not in new_hvp:
2698 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2699 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2701 self.hv_list = cluster.enabled_hypervisors
2703 if self.op.hvparams or self.op.enabled_hypervisors is not None:
2704 # either the enabled list has changed, or the parameters have, validate
2705 for hv_name, hv_params in self.new_hvparams.items():
2706 if ((self.op.hvparams and hv_name in self.op.hvparams) or
2707 (self.op.enabled_hypervisors and
2708 hv_name in self.op.enabled_hypervisors)):
2709 # either this is a new hypervisor, or its parameters have changed
2710 hv_class = hypervisor.GetHypervisor(hv_name)
2711 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2712 hv_class.CheckParameterSyntax(hv_params)
2713 _CheckHVParams(self, node_list, hv_name, hv_params)
2716 # no need to check any newly-enabled hypervisors, since the
2717 # defaults have already been checked in the above code-block
2718 for os_name, os_hvp in self.new_os_hvp.items():
2719 for hv_name, hv_params in os_hvp.items():
2720 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2721 # we need to fill in the new os_hvp on top of the actual hv_p
2722 cluster_defaults = self.new_hvparams.get(hv_name, {})
2723 new_osp = objects.FillDict(cluster_defaults, hv_params)
2724 hv_class = hypervisor.GetHypervisor(hv_name)
2725 hv_class.CheckParameterSyntax(new_osp)
2726 _CheckHVParams(self, node_list, hv_name, new_osp)
2729 def Exec(self, feedback_fn):
2730 """Change the parameters of the cluster.
2733 if self.op.vg_name is not None:
2734 new_volume = self.op.vg_name
2737 if new_volume != self.cfg.GetVGName():
2738 self.cfg.SetVGName(new_volume)
2740 feedback_fn("Cluster LVM configuration already in desired"
2741 " state, not changing")
2742 if self.op.hvparams:
2743 self.cluster.hvparams = self.new_hvparams
2745 self.cluster.os_hvp = self.new_os_hvp
2746 if self.op.enabled_hypervisors is not None:
2747 self.cluster.hvparams = self.new_hvparams
2748 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2749 if self.op.beparams:
2750 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2751 if self.op.nicparams:
2752 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2753 if self.op.osparams:
2754 self.cluster.osparams = self.new_osp
2756 if self.op.candidate_pool_size is not None:
2757 self.cluster.candidate_pool_size = self.op.candidate_pool_size
2758 # we need to update the pool size here, otherwise the save will fail
2759 _AdjustCandidatePool(self, [])
2761 if self.op.maintain_node_health is not None:
2762 self.cluster.maintain_node_health = self.op.maintain_node_health
2764 if self.op.add_uids is not None:
2765 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2767 if self.op.remove_uids is not None:
2768 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2770 if self.op.uid_pool is not None:
2771 self.cluster.uid_pool = self.op.uid_pool
2773 self.cfg.Update(self.cluster, feedback_fn)
2776 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2777 """Distribute additional files which are part of the cluster configuration.
2779 ConfigWriter takes care of distributing the config and ssconf files, but
2780 there are more files which should be distributed to all nodes. This function
2781 makes sure those are copied.
2783 @param lu: calling logical unit
2784 @param additional_nodes: list of nodes not in the config to distribute to
2787 # 1. Gather target nodes
2788 myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2789 dist_nodes = lu.cfg.GetOnlineNodeList()
2790 if additional_nodes is not None:
2791 dist_nodes.extend(additional_nodes)
2792 if myself.name in dist_nodes:
2793 dist_nodes.remove(myself.name)
2795 # 2. Gather files to distribute
2796 dist_files = set([constants.ETC_HOSTS,
2797 constants.SSH_KNOWN_HOSTS_FILE,
2798 constants.RAPI_CERT_FILE,
2799 constants.RAPI_USERS_FILE,
2800 constants.CONFD_HMAC_KEY,
2801 constants.CLUSTER_DOMAIN_SECRET_FILE,
2804 enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2805 for hv_name in enabled_hypervisors:
2806 hv_class = hypervisor.GetHypervisor(hv_name)
2807 dist_files.update(hv_class.GetAncillaryFiles())
2809 # 3. Perform the files upload
2810 for fname in dist_files:
2811 if os.path.exists(fname):
2812 result = lu.rpc.call_upload_file(dist_nodes, fname)
2813 for to_node, to_result in result.items():
2814 msg = to_result.fail_msg
2816 msg = ("Copy of file %s to node %s failed: %s" %
2817 (fname, to_node, msg))
2818 lu.proc.LogWarning(msg)
2821 class LURedistributeConfig(NoHooksLU):
2822 """Force the redistribution of cluster configuration.
2824 This is a very simple LU.
2830 def ExpandNames(self):
2831 self.needed_locks = {
2832 locking.LEVEL_NODE: locking.ALL_SET,
2834 self.share_locks[locking.LEVEL_NODE] = 1
2836 def Exec(self, feedback_fn):
2837 """Redistribute the configuration.
2840 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2841 _RedistributeAncillaryFiles(self)
2844 def _WaitForSync(lu, instance, disks=None, oneshot=False):
2845 """Sleep and poll for an instance's disk to sync.
2848 if not instance.disks or disks is not None and not disks:
2851 disks = _ExpandCheckDisks(instance, disks)
2854 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2856 node = instance.primary_node
2859 lu.cfg.SetDiskID(dev, node)
2861 # TODO: Convert to utils.Retry
2864 degr_retries = 10 # in seconds, as we sleep 1 second each time
2868 cumul_degraded = False
2869 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2870 msg = rstats.fail_msg
2872 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2875 raise errors.RemoteError("Can't contact node %s for mirror data,"
2876 " aborting." % node)
2879 rstats = rstats.payload
2881 for i, mstat in enumerate(rstats):
2883 lu.LogWarning("Can't compute data for node %s/%s",
2884 node, disks[i].iv_name)
2887 cumul_degraded = (cumul_degraded or
2888 (mstat.is_degraded and mstat.sync_percent is None))
2889 if mstat.sync_percent is not None:
2891 if mstat.estimated_time is not None:
2892 rem_time = ("%s remaining (estimated)" %
2893 utils.FormatSeconds(mstat.estimated_time))
2894 max_time = mstat.estimated_time
2896 rem_time = "no time estimate"
2897 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2898 (disks[i].iv_name, mstat.sync_percent, rem_time))
2900 # if we're done but degraded, let's do a few small retries, to
2901 # make sure we see a stable and not transient situation; therefore
2902 # we force restart of the loop
2903 if (done or oneshot) and cumul_degraded and degr_retries > 0:
2904 logging.info("Degraded disks found, %d retries left", degr_retries)
2912 time.sleep(min(60, max_time))
2915 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2916 return not cumul_degraded
2919 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2920 """Check that mirrors are not degraded.
2922 The ldisk parameter, if True, will change the test from the
2923 is_degraded attribute (which represents overall non-ok status for
2924 the device(s)) to the ldisk (representing the local storage status).
2927 lu.cfg.SetDiskID(dev, node)
2931 if on_primary or dev.AssembleOnSecondary():
2932 rstats = lu.rpc.call_blockdev_find(node, dev)
2933 msg = rstats.fail_msg
2935 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2937 elif not rstats.payload:
2938 lu.LogWarning("Can't find disk on node %s", node)
2942 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2944 result = result and not rstats.payload.is_degraded
2947 for child in dev.children:
2948 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2953 class LUDiagnoseOS(NoHooksLU):
2954 """Logical unit for OS diagnose/query.
2958 ("output_fields", _TListOf(_TNonEmptyString)),
2959 ("names", _TListOf(_TNonEmptyString)),
2962 _FIELDS_STATIC = utils.FieldSet()
2963 _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
2964 "parameters", "api_versions")
2966 def CheckArguments(self):
2968 raise errors.OpPrereqError("Selective OS query not supported",
2971 _CheckOutputFields(static=self._FIELDS_STATIC,
2972 dynamic=self._FIELDS_DYNAMIC,
2973 selected=self.op.output_fields)
2975 def ExpandNames(self):
2976 # Lock all nodes, in shared mode
2977 # Temporary removal of locks, should be reverted later
2978 # TODO: reintroduce locks when they are lighter-weight
2979 self.needed_locks = {}
2980 #self.share_locks[locking.LEVEL_NODE] = 1
2981 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2984 def _DiagnoseByOS(rlist):
2985 """Remaps a per-node return list into an a per-os per-node dictionary
2987 @param rlist: a map with node names as keys and OS objects as values
2990 @return: a dictionary with osnames as keys and as value another
2991 map, with nodes as keys and tuples of (path, status, diagnose,
2992 variants, parameters, api_versions) as values, eg::
2994 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
2995 (/srv/..., False, "invalid api")],
2996 "node2": [(/srv/..., True, "", [], [])]}
3001 # we build here the list of nodes that didn't fail the RPC (at RPC
3002 # level), so that nodes with a non-responding node daemon don't
3003 # make all OSes invalid
3004 good_nodes = [node_name for node_name in rlist
3005 if not rlist[node_name].fail_msg]
3006 for node_name, nr in rlist.items():
3007 if nr.fail_msg or not nr.payload:
3009 for (name, path, status, diagnose, variants,
3010 params, api_versions) in nr.payload:
3011 if name not in all_os:
3012 # build a list of nodes for this os containing empty lists
3013 # for each node in node_list
3015 for nname in good_nodes:
3016 all_os[name][nname] = []
3017 # convert params from [name, help] to (name, help)
3018 params = [tuple(v) for v in params]
3019 all_os[name][node_name].append((path, status, diagnose,
3020 variants, params, api_versions))
3023 def Exec(self, feedback_fn):
3024 """Compute the list of OSes.
3027 valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3028 node_data = self.rpc.call_os_diagnose(valid_nodes)
3029 pol = self._DiagnoseByOS(node_data)
3032 for os_name, os_data in pol.items():
3035 (variants, params, api_versions) = null_state = (set(), set(), set())
3036 for idx, osl in enumerate(os_data.values()):
3037 valid = bool(valid and osl and osl[0][1])
3039 (variants, params, api_versions) = null_state
3041 node_variants, node_params, node_api = osl[0][3:6]
3042 if idx == 0: # first entry
3043 variants = set(node_variants)
3044 params = set(node_params)
3045 api_versions = set(node_api)
3046 else: # keep consistency
3047 variants.intersection_update(node_variants)
3048 params.intersection_update(node_params)
3049 api_versions.intersection_update(node_api)
3051 for field in self.op.output_fields:
3054 elif field == "valid":
3056 elif field == "node_status":
3057 # this is just a copy of the dict
3059 for node_name, nos_list in os_data.items():
3060 val[node_name] = nos_list
3061 elif field == "variants":
3062 val = list(variants)
3063 elif field == "parameters":
3065 elif field == "api_versions":
3066 val = list(api_versions)
3068 raise errors.ParameterError(field)
3075 class LURemoveNode(LogicalUnit):
3076 """Logical unit for removing a node.
3079 HPATH = "node-remove"
3080 HTYPE = constants.HTYPE_NODE
3081 _OP_REQP = [("node_name", _TNonEmptyString)]
3083 def BuildHooksEnv(self):
3086 This doesn't run on the target node in the pre phase as a failed
3087 node would then be impossible to remove.
3091 "OP_TARGET": self.op.node_name,
3092 "NODE_NAME": self.op.node_name,
3094 all_nodes = self.cfg.GetNodeList()
3096 all_nodes.remove(self.op.node_name)
3098 logging.warning("Node %s which is about to be removed not found"
3099 " in the all nodes list", self.op.node_name)
3100 return env, all_nodes, all_nodes
3102 def CheckPrereq(self):
3103 """Check prerequisites.
3106 - the node exists in the configuration
3107 - it does not have primary or secondary instances
3108 - it's not the master
3110 Any errors are signaled by raising errors.OpPrereqError.
3113 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3114 node = self.cfg.GetNodeInfo(self.op.node_name)
3115 assert node is not None
3117 instance_list = self.cfg.GetInstanceList()
3119 masternode = self.cfg.GetMasterNode()
3120 if node.name == masternode:
3121 raise errors.OpPrereqError("Node is the master node,"
3122 " you need to failover first.",
3125 for instance_name in instance_list:
3126 instance = self.cfg.GetInstanceInfo(instance_name)
3127 if node.name in instance.all_nodes:
3128 raise errors.OpPrereqError("Instance %s is still running on the node,"
3129 " please remove first." % instance_name,
3131 self.op.node_name = node.name
3134 def Exec(self, feedback_fn):
3135 """Removes the node from the cluster.
3139 logging.info("Stopping the node daemon and removing configs from node %s",
3142 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3144 # Promote nodes to master candidate as needed
3145 _AdjustCandidatePool(self, exceptions=[node.name])
3146 self.context.RemoveNode(node.name)
3148 # Run post hooks on the node before it's removed
3149 hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3151 hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3153 # pylint: disable-msg=W0702
3154 self.LogWarning("Errors occurred running hooks on %s" % node.name)
3156 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3157 msg = result.fail_msg
3159 self.LogWarning("Errors encountered on the remote node while leaving"
3160 " the cluster: %s", msg)
3162 # Remove node from our /etc/hosts
3163 if self.cfg.GetClusterInfo().modify_etc_hosts:
3164 # FIXME: this should be done via an rpc call to node daemon
3165 utils.RemoveHostFromEtcHosts(node.name)
3166 _RedistributeAncillaryFiles(self)
3169 class LUQueryNodes(NoHooksLU):
3170 """Logical unit for querying nodes.
3173 # pylint: disable-msg=W0142
3175 ("output_fields", _TListOf(_TNonEmptyString)),
3176 ("names", _TListOf(_TNonEmptyString)),
3177 ("use_locking", _TBool),
3181 _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3182 "master_candidate", "offline", "drained"]
3184 _FIELDS_DYNAMIC = utils.FieldSet(
3186 "mtotal", "mnode", "mfree",
3188 "ctotal", "cnodes", "csockets",
3191 _FIELDS_STATIC = utils.FieldSet(*[
3192 "pinst_cnt", "sinst_cnt",
3193 "pinst_list", "sinst_list",
3194 "pip", "sip", "tags",
3196 "role"] + _SIMPLE_FIELDS
3199 def CheckArguments(self):
3200 _CheckOutputFields(static=self._FIELDS_STATIC,
3201 dynamic=self._FIELDS_DYNAMIC,
3202 selected=self.op.output_fields)
3204 def ExpandNames(self):
3205 self.needed_locks = {}
3206 self.share_locks[locking.LEVEL_NODE] = 1
3209 self.wanted = _GetWantedNodes(self, self.op.names)
3211 self.wanted = locking.ALL_SET
3213 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3214 self.do_locking = self.do_node_query and self.op.use_locking
3216 # if we don't request only static fields, we need to lock the nodes
3217 self.needed_locks[locking.LEVEL_NODE] = self.wanted
3219 def Exec(self, feedback_fn):
3220 """Computes the list of nodes and their attributes.
3223 all_info = self.cfg.GetAllNodesInfo()
3225 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3226 elif self.wanted != locking.ALL_SET:
3227 nodenames = self.wanted
3228 missing = set(nodenames).difference(all_info.keys())
3230 raise errors.OpExecError(
3231 "Some nodes were removed before retrieving their data: %s" % missing)
3233 nodenames = all_info.keys()
3235 nodenames = utils.NiceSort(nodenames)
3236 nodelist = [all_info[name] for name in nodenames]
3238 # begin data gathering
3240 if self.do_node_query:
3242 node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3243 self.cfg.GetHypervisorType())
3244 for name in nodenames:
3245 nodeinfo = node_data[name]
3246 if not nodeinfo.fail_msg and nodeinfo.payload:
3247 nodeinfo = nodeinfo.payload
3248 fn = utils.TryConvert
3250 "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3251 "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3252 "mfree": fn(int, nodeinfo.get('memory_free', None)),
3253 "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3254 "dfree": fn(int, nodeinfo.get('vg_free', None)),
3255 "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3256 "bootid": nodeinfo.get('bootid', None),
3257 "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3258 "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3261 live_data[name] = {}
3263 live_data = dict.fromkeys(nodenames, {})
3265 node_to_primary = dict([(name, set()) for name in nodenames])
3266 node_to_secondary = dict([(name, set()) for name in nodenames])
3268 inst_fields = frozenset(("pinst_cnt", "pinst_list",
3269 "sinst_cnt", "sinst_list"))
3270 if inst_fields & frozenset(self.op.output_fields):
3271 inst_data = self.cfg.GetAllInstancesInfo()
3273 for inst in inst_data.values():
3274 if inst.primary_node in node_to_primary:
3275 node_to_primary[inst.primary_node].add(inst.name)
3276 for secnode in inst.secondary_nodes:
3277 if secnode in node_to_secondary:
3278 node_to_secondary[secnode].add(inst.name)
3280 master_node = self.cfg.GetMasterNode()
3282 # end data gathering
3285 for node in nodelist:
3287 for field in self.op.output_fields:
3288 if field in self._SIMPLE_FIELDS:
3289 val = getattr(node, field)
3290 elif field == "pinst_list":
3291 val = list(node_to_primary[node.name])
3292 elif field == "sinst_list":
3293 val = list(node_to_secondary[node.name])
3294 elif field == "pinst_cnt":
3295 val = len(node_to_primary[node.name])
3296 elif field == "sinst_cnt":
3297 val = len(node_to_secondary[node.name])
3298 elif field == "pip":
3299 val = node.primary_ip
3300 elif field == "sip":
3301 val = node.secondary_ip
3302 elif field == "tags":
3303 val = list(node.GetTags())
3304 elif field == "master":
3305 val = node.name == master_node
3306 elif self._FIELDS_DYNAMIC.Matches(field):
3307 val = live_data[node.name].get(field, None)
3308 elif field == "role":
3309 if node.name == master_node:
3311 elif node.master_candidate:
3320 raise errors.ParameterError(field)
3321 node_output.append(val)
3322 output.append(node_output)
3327 class LUQueryNodeVolumes(NoHooksLU):
3328 """Logical unit for getting volumes on node(s).
3332 ("nodes", _TListOf(_TNonEmptyString)),
3333 ("output_fields", _TListOf(_TNonEmptyString)),
3336 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3337 _FIELDS_STATIC = utils.FieldSet("node")
3339 def CheckArguments(self):
3340 _CheckOutputFields(static=self._FIELDS_STATIC,
3341 dynamic=self._FIELDS_DYNAMIC,
3342 selected=self.op.output_fields)
3344 def ExpandNames(self):
3345 self.needed_locks = {}
3346 self.share_locks[locking.LEVEL_NODE] = 1
3347 if not self.op.nodes:
3348 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3350 self.needed_locks[locking.LEVEL_NODE] = \
3351 _GetWantedNodes(self, self.op.nodes)
3353 def Exec(self, feedback_fn):
3354 """Computes the list of nodes and their attributes.
3357 nodenames = self.acquired_locks[locking.LEVEL_NODE]
3358 volumes = self.rpc.call_node_volumes(nodenames)
3360 ilist = [self.cfg.GetInstanceInfo(iname) for iname
3361 in self.cfg.GetInstanceList()]
3363 lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3366 for node in nodenames:
3367 nresult = volumes[node]
3370 msg = nresult.fail_msg
3372 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3375 node_vols = nresult.payload[:]
3376 node_vols.sort(key=lambda vol: vol['dev'])
3378 for vol in node_vols:
3380 for field in self.op.output_fields:
3383 elif field == "phys":
3387 elif field == "name":
3389 elif field == "size":
3390 val = int(float(vol['size']))
3391 elif field == "instance":
3393 if node not in lv_by_node[inst]:
3395 if vol['name'] in lv_by_node[inst][node]:
3401 raise errors.ParameterError(field)
3402 node_output.append(str(val))
3404 output.append(node_output)
3409 class LUQueryNodeStorage(NoHooksLU):
3410 """Logical unit for getting information on storage units on node(s).
3413 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3415 ("nodes", _TListOf(_TNonEmptyString)),
3416 ("storage_type", _CheckStorageType),
3417 ("output_fields", _TListOf(_TNonEmptyString)),
3419 _OP_DEFS = [("name", None)]
3422 def CheckArguments(self):
3423 _CheckOutputFields(static=self._FIELDS_STATIC,
3424 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3425 selected=self.op.output_fields)
3427 def ExpandNames(self):
3428 self.needed_locks = {}
3429 self.share_locks[locking.LEVEL_NODE] = 1
3432 self.needed_locks[locking.LEVEL_NODE] = \
3433 _GetWantedNodes(self, self.op.nodes)
3435 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3437 def Exec(self, feedback_fn):
3438 """Computes the list of nodes and their attributes.
3441 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3443 # Always get name to sort by
3444 if constants.SF_NAME in self.op.output_fields:
3445 fields = self.op.output_fields[:]
3447 fields = [constants.SF_NAME] + self.op.output_fields
3449 # Never ask for node or type as it's only known to the LU
3450 for extra in [constants.SF_NODE, constants.SF_TYPE]:
3451 while extra in fields:
3452 fields.remove(extra)
3454 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3455 name_idx = field_idx[constants.SF_NAME]
3457 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3458 data = self.rpc.call_storage_list(self.nodes,
3459 self.op.storage_type, st_args,
3460 self.op.name, fields)
3464 for node in utils.NiceSort(self.nodes):
3465 nresult = data[node]
3469 msg = nresult.fail_msg
3471 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3474 rows = dict([(row[name_idx], row) for row in nresult.payload])
3476 for name in utils.NiceSort(rows.keys()):
3481 for field in self.op.output_fields:
3482 if field == constants.SF_NODE:
3484 elif field == constants.SF_TYPE:
3485 val = self.op.storage_type
3486 elif field in field_idx:
3487 val = row[field_idx[field]]
3489 raise errors.ParameterError(field)
3498 class LUModifyNodeStorage(NoHooksLU):
3499 """Logical unit for modifying a storage volume on a node.
3503 ("node_name", _TNonEmptyString),
3504 ("storage_type", _CheckStorageType),
3505 ("name", _TNonEmptyString),
3506 ("changes", _TDict),
3510 def CheckArguments(self):
3511 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3513 storage_type = self.op.storage_type
3516 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3518 raise errors.OpPrereqError("Storage units of type '%s' can not be"
3519 " modified" % storage_type,
3522 diff = set(self.op.changes.keys()) - modifiable
3524 raise errors.OpPrereqError("The following fields can not be modified for"
3525 " storage units of type '%s': %r" %
3526 (storage_type, list(diff)),
3529 def ExpandNames(self):
3530 self.needed_locks = {
3531 locking.LEVEL_NODE: self.op.node_name,
3534 def Exec(self, feedback_fn):
3535 """Computes the list of nodes and their attributes.
3538 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3539 result = self.rpc.call_storage_modify(self.op.node_name,
3540 self.op.storage_type, st_args,
3541 self.op.name, self.op.changes)
3542 result.Raise("Failed to modify storage unit '%s' on %s" %
3543 (self.op.name, self.op.node_name))
3546 class LUAddNode(LogicalUnit):
3547 """Logical unit for adding node to the cluster.
3551 HTYPE = constants.HTYPE_NODE
3553 ("node_name", _TNonEmptyString),
3555 _OP_DEFS = [("secondary_ip", None)]
3557 def CheckArguments(self):
3558 # validate/normalize the node name
3559 self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3561 def BuildHooksEnv(self):
3564 This will run on all nodes before, and on all nodes + the new node after.
3568 "OP_TARGET": self.op.node_name,
3569 "NODE_NAME": self.op.node_name,
3570 "NODE_PIP": self.op.primary_ip,
3571 "NODE_SIP": self.op.secondary_ip,
3573 nodes_0 = self.cfg.GetNodeList()
3574 nodes_1 = nodes_0 + [self.op.node_name, ]
3575 return env, nodes_0, nodes_1
3577 def CheckPrereq(self):
3578 """Check prerequisites.
3581 - the new node is not already in the config
3583 - its parameters (single/dual homed) matches the cluster
3585 Any errors are signaled by raising errors.OpPrereqError.
3588 node_name = self.op.node_name
3591 dns_data = utils.GetHostInfo(node_name)
3593 node = dns_data.name
3594 primary_ip = self.op.primary_ip = dns_data.ip
3595 if self.op.secondary_ip is None:
3596 self.op.secondary_ip = primary_ip
3597 if not utils.IsValidIP(self.op.secondary_ip):
3598 raise errors.OpPrereqError("Invalid secondary IP given",
3600 secondary_ip = self.op.secondary_ip
3602 node_list = cfg.GetNodeList()
3603 if not self.op.readd and node in node_list:
3604 raise errors.OpPrereqError("Node %s is already in the configuration" %
3605 node, errors.ECODE_EXISTS)
3606 elif self.op.readd and node not in node_list:
3607 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3610 self.changed_primary_ip = False
3612 for existing_node_name in node_list:
3613 existing_node = cfg.GetNodeInfo(existing_node_name)
3615 if self.op.readd and node == existing_node_name:
3616 if existing_node.secondary_ip != secondary_ip:
3617 raise errors.OpPrereqError("Readded node doesn't have the same IP"
3618 " address configuration as before",
3620 if existing_node.primary_ip != primary_ip:
3621 self.changed_primary_ip = True
3625 if (existing_node.primary_ip == primary_ip or
3626 existing_node.secondary_ip == primary_ip or
3627 existing_node.primary_ip == secondary_ip or
3628 existing_node.secondary_ip == secondary_ip):
3629 raise errors.OpPrereqError("New node ip address(es) conflict with"
3630 " existing node %s" % existing_node.name,
3631 errors.ECODE_NOTUNIQUE)
3633 # check that the type of the node (single versus dual homed) is the
3634 # same as for the master
3635 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3636 master_singlehomed = myself.secondary_ip == myself.primary_ip
3637 newbie_singlehomed = secondary_ip == primary_ip
3638 if master_singlehomed != newbie_singlehomed:
3639 if master_singlehomed:
3640 raise errors.OpPrereqError("The master has no private ip but the"
3641 " new node has one",
3644 raise errors.OpPrereqError("The master has a private ip but the"
3645 " new node doesn't have one",
3648 # checks reachability
3649 if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3650 raise errors.OpPrereqError("Node not reachable by ping",
3651 errors.ECODE_ENVIRON)
3653 if not newbie_singlehomed:
3654 # check reachability from my secondary ip to newbie's secondary ip
3655 if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3656 source=myself.secondary_ip):
3657 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3658 " based ping to noded port",
3659 errors.ECODE_ENVIRON)
3666 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3669 self.new_node = self.cfg.GetNodeInfo(node)
3670 assert self.new_node is not None, "Can't retrieve locked node %s" % node
3672 self.new_node = objects.Node(name=node,
3673 primary_ip=primary_ip,
3674 secondary_ip=secondary_ip,
3675 master_candidate=self.master_candidate,
3676 offline=False, drained=False)
3678 def Exec(self, feedback_fn):
3679 """Adds the new node to the cluster.
3682 new_node = self.new_node
3683 node = new_node.name
3685 # for re-adds, reset the offline/drained/master-candidate flags;
3686 # we need to reset here, otherwise offline would prevent RPC calls
3687 # later in the procedure; this also means that if the re-add
3688 # fails, we are left with a non-offlined, broken node
3690 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3691 self.LogInfo("Readding a node, the offline/drained flags were reset")
3692 # if we demote the node, we do cleanup later in the procedure
3693 new_node.master_candidate = self.master_candidate
3694 if self.changed_primary_ip:
3695 new_node.primary_ip = self.op.primary_ip
3697 # notify the user about any possible mc promotion
3698 if new_node.master_candidate:
3699 self.LogInfo("Node will be a master candidate")
3701 # check connectivity
3702 result = self.rpc.call_version([node])[node]
3703 result.Raise("Can't get version information from node %s" % node)
3704 if constants.PROTOCOL_VERSION == result.payload:
3705 logging.info("Communication to node %s fine, sw version %s match",
3706 node, result.payload)
3708 raise errors.OpExecError("Version mismatch master version %s,"
3709 " node version %s" %
3710 (constants.PROTOCOL_VERSION, result.payload))
3713 if self.cfg.GetClusterInfo().modify_ssh_setup:
3714 logging.info("Copy ssh key to node %s", node)
3715 priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3717 keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3718 constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3722 keyarray.append(utils.ReadFile(i))
3724 result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3725 keyarray[2], keyarray[3], keyarray[4],
3727 result.Raise("Cannot transfer ssh keys to the new node")
3729 # Add node to our /etc/hosts, and add key to known_hosts
3730 if self.cfg.GetClusterInfo().modify_etc_hosts:
3731 # FIXME: this should be done via an rpc call to node daemon
3732 utils.AddHostToEtcHosts(new_node.name)
3734 if new_node.secondary_ip != new_node.primary_ip:
3735 result = self.rpc.call_node_has_ip_address(new_node.name,
3736 new_node.secondary_ip)
3737 result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3738 prereq=True, ecode=errors.ECODE_ENVIRON)
3739 if not result.payload:
3740 raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3741 " you gave (%s). Please fix and re-run this"
3742 " command." % new_node.secondary_ip)
3744 node_verify_list = [self.cfg.GetMasterNode()]
3745 node_verify_param = {
3746 constants.NV_NODELIST: [node],
3747 # TODO: do a node-net-test as well?
3750 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3751 self.cfg.GetClusterName())
3752 for verifier in node_verify_list:
3753 result[verifier].Raise("Cannot communicate with node %s" % verifier)
3754 nl_payload = result[verifier].payload[constants.NV_NODELIST]
3756 for failed in nl_payload:
3757 feedback_fn("ssh/hostname verification failed"
3758 " (checking from %s): %s" %
3759 (verifier, nl_payload[failed]))
3760 raise errors.OpExecError("ssh/hostname verification failed.")
3763 _RedistributeAncillaryFiles(self)
3764 self.context.ReaddNode(new_node)
3765 # make sure we redistribute the config
3766 self.cfg.Update(new_node, feedback_fn)
3767 # and make sure the new node will not have old files around
3768 if not new_node.master_candidate:
3769 result = self.rpc.call_node_demote_from_mc(new_node.name)
3770 msg = result.fail_msg
3772 self.LogWarning("Node failed to demote itself from master"
3773 " candidate status: %s" % msg)
3775 _RedistributeAncillaryFiles(self, additional_nodes=[node])
3776 self.context.AddNode(new_node, self.proc.GetECId())
3779 class LUSetNodeParams(LogicalUnit):
3780 """Modifies the parameters of a node.
3783 HPATH = "node-modify"
3784 HTYPE = constants.HTYPE_NODE
3785 _OP_REQP = [("node_name", _TNonEmptyString)]
3788 def CheckArguments(self):
3789 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3790 _CheckBooleanOpField(self.op, 'master_candidate')
3791 _CheckBooleanOpField(self.op, 'offline')
3792 _CheckBooleanOpField(self.op, 'drained')
3793 _CheckBooleanOpField(self.op, 'auto_promote')
3794 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3795 if all_mods.count(None) == 3:
3796 raise errors.OpPrereqError("Please pass at least one modification",
3798 if all_mods.count(True) > 1:
3799 raise errors.OpPrereqError("Can't set the node into more than one"
3800 " state at the same time",
3803 # Boolean value that tells us whether we're offlining or draining the node
3804 self.offline_or_drain = (self.op.offline == True or
3805 self.op.drained == True)
3806 self.deoffline_or_drain = (self.op.offline == False or
3807 self.op.drained == False)
3808 self.might_demote = (self.op.master_candidate == False or
3809 self.offline_or_drain)
3811 self.lock_all = self.op.auto_promote and self.might_demote
3814 def ExpandNames(self):
3816 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3818 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3820 def BuildHooksEnv(self):
3823 This runs on the master node.
3827 "OP_TARGET": self.op.node_name,
3828 "MASTER_CANDIDATE": str(self.op.master_candidate),
3829 "OFFLINE": str(self.op.offline),
3830 "DRAINED": str(self.op.drained),
3832 nl = [self.cfg.GetMasterNode(),
3836 def CheckPrereq(self):
3837 """Check prerequisites.
3839 This only checks the instance list against the existing names.
3842 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3844 if (self.op.master_candidate is not None or
3845 self.op.drained is not None or
3846 self.op.offline is not None):
3847 # we can't change the master's node flags
3848 if self.op.node_name == self.cfg.GetMasterNode():
3849 raise errors.OpPrereqError("The master role can be changed"
3850 " only via masterfailover",
3854 if node.master_candidate and self.might_demote and not self.lock_all:
3855 assert not self.op.auto_promote, "auto-promote set but lock_all not"
3856 # check if after removing the current node, we're missing master
3858 (mc_remaining, mc_should, _) = \
3859 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3860 if mc_remaining < mc_should:
3861 raise errors.OpPrereqError("Not enough master candidates, please"
3862 " pass auto_promote to allow promotion",
3865 if (self.op.master_candidate == True and
3866 ((node.offline and not self.op.offline == False) or
3867 (node.drained and not self.op.drained == False))):
3868 raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3869 " to master_candidate" % node.name,
3872 # If we're being deofflined/drained, we'll MC ourself if needed
3873 if (self.deoffline_or_drain and not self.offline_or_drain and not
3874 self.op.master_candidate == True and not node.master_candidate):
3875 self.op.master_candidate = _DecideSelfPromotion(self)
3876 if self.op.master_candidate:
3877 self.LogInfo("Autopromoting node to master candidate")
3881 def Exec(self, feedback_fn):
3890 if self.op.offline is not None:
3891 node.offline = self.op.offline
3892 result.append(("offline", str(self.op.offline)))
3893 if self.op.offline == True:
3894 if node.master_candidate:
3895 node.master_candidate = False
3897 result.append(("master_candidate", "auto-demotion due to offline"))
3899 node.drained = False
3900 result.append(("drained", "clear drained status due to offline"))
3902 if self.op.master_candidate is not None:
3903 node.master_candidate = self.op.master_candidate
3905 result.append(("master_candidate", str(self.op.master_candidate)))
3906 if self.op.master_candidate == False:
3907 rrc = self.rpc.call_node_demote_from_mc(node.name)
3910 self.LogWarning("Node failed to demote itself: %s" % msg)
3912 if self.op.drained is not None:
3913 node.drained = self.op.drained
3914 result.append(("drained", str(self.op.drained)))
3915 if self.op.drained == True:
3916 if node.master_candidate:
3917 node.master_candidate = False
3919 result.append(("master_candidate", "auto-demotion due to drain"))
3920 rrc = self.rpc.call_node_demote_from_mc(node.name)
3923 self.LogWarning("Node failed to demote itself: %s" % msg)
3925 node.offline = False
3926 result.append(("offline", "clear offline status due to drain"))
3928 # we locked all nodes, we adjust the CP before updating this node
3930 _AdjustCandidatePool(self, [node.name])
3932 # this will trigger configuration file update, if needed
3933 self.cfg.Update(node, feedback_fn)
3935 # this will trigger job queue propagation or cleanup
3937 self.context.ReaddNode(node)
3942 class LUPowercycleNode(NoHooksLU):
3943 """Powercycles a node.
3947 ("node_name", _TNonEmptyString),
3952 def CheckArguments(self):
3953 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3954 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3955 raise errors.OpPrereqError("The node is the master and the force"
3956 " parameter was not set",
3959 def ExpandNames(self):
3960 """Locking for PowercycleNode.
3962 This is a last-resort option and shouldn't block on other
3963 jobs. Therefore, we grab no locks.
3966 self.needed_locks = {}
3968 def Exec(self, feedback_fn):
3972 result = self.rpc.call_node_powercycle(self.op.node_name,
3973 self.cfg.GetHypervisorType())
3974 result.Raise("Failed to schedule the reboot")
3975 return result.payload
3978 class LUQueryClusterInfo(NoHooksLU):
3979 """Query cluster configuration.
3985 def ExpandNames(self):
3986 self.needed_locks = {}
3988 def Exec(self, feedback_fn):
3989 """Return cluster config.
3992 cluster = self.cfg.GetClusterInfo()
3995 # Filter just for enabled hypervisors
3996 for os_name, hv_dict in cluster.os_hvp.items():
3997 os_hvp[os_name] = {}
3998 for hv_name, hv_params in hv_dict.items():
3999 if hv_name in cluster.enabled_hypervisors:
4000 os_hvp[os_name][hv_name] = hv_params
4003 "software_version": constants.RELEASE_VERSION,
4004 "protocol_version": constants.PROTOCOL_VERSION,
4005 "config_version": constants.CONFIG_VERSION,
4006 "os_api_version": max(constants.OS_API_VERSIONS),
4007 "export_version": constants.EXPORT_VERSION,
4008 "architecture": (platform.architecture()[0], platform.machine()),
4009 "name": cluster.cluster_name,
4010 "master": cluster.master_node,
4011 "default_hypervisor": cluster.enabled_hypervisors[0],
4012 "enabled_hypervisors": cluster.enabled_hypervisors,
4013 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4014 for hypervisor_name in cluster.enabled_hypervisors]),
4016 "beparams": cluster.beparams,
4017 "osparams": cluster.osparams,
4018 "nicparams": cluster.nicparams,
4019 "candidate_pool_size": cluster.candidate_pool_size,
4020 "master_netdev": cluster.master_netdev,
4021 "volume_group_name": cluster.volume_group_name,
4022 "file_storage_dir": cluster.file_storage_dir,
4023 "maintain_node_health": cluster.maintain_node_health,
4024 "ctime": cluster.ctime,
4025 "mtime": cluster.mtime,
4026 "uuid": cluster.uuid,
4027 "tags": list(cluster.GetTags()),
4028 "uid_pool": cluster.uid_pool,
4034 class LUQueryConfigValues(NoHooksLU):
4035 """Return configuration values.
4040 _FIELDS_DYNAMIC = utils.FieldSet()
4041 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4044 def CheckArguments(self):
4045 _CheckOutputFields(static=self._FIELDS_STATIC,
4046 dynamic=self._FIELDS_DYNAMIC,
4047 selected=self.op.output_fields)
4049 def ExpandNames(self):
4050 self.needed_locks = {}
4052 def Exec(self, feedback_fn):
4053 """Dump a representation of the cluster config to the standard output.
4057 for field in self.op.output_fields:
4058 if field == "cluster_name":
4059 entry = self.cfg.GetClusterName()
4060 elif field == "master_node":
4061 entry = self.cfg.GetMasterNode()
4062 elif field == "drain_flag":
4063 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4064 elif field == "watcher_pause":
4065 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4067 raise errors.ParameterError(field)
4068 values.append(entry)
4072 class LUActivateInstanceDisks(NoHooksLU):
4073 """Bring up an instance's disks.
4076 _OP_REQP = [("instance_name", _TNonEmptyString)]
4077 _OP_DEFS = [("ignore_size", False)]
4080 def ExpandNames(self):
4081 self._ExpandAndLockInstance()
4082 self.needed_locks[locking.LEVEL_NODE] = []
4083 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4085 def DeclareLocks(self, level):
4086 if level == locking.LEVEL_NODE:
4087 self._LockInstancesNodes()
4089 def CheckPrereq(self):
4090 """Check prerequisites.
4092 This checks that the instance is in the cluster.
4095 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4096 assert self.instance is not None, \
4097 "Cannot retrieve locked instance %s" % self.op.instance_name
4098 _CheckNodeOnline(self, self.instance.primary_node)
4100 def Exec(self, feedback_fn):
4101 """Activate the disks.
4104 disks_ok, disks_info = \
4105 _AssembleInstanceDisks(self, self.instance,
4106 ignore_size=self.op.ignore_size)
4108 raise errors.OpExecError("Cannot activate block devices")
4113 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4115 """Prepare the block devices for an instance.
4117 This sets up the block devices on all nodes.
4119 @type lu: L{LogicalUnit}
4120 @param lu: the logical unit on whose behalf we execute
4121 @type instance: L{objects.Instance}
4122 @param instance: the instance for whose disks we assemble
4123 @type disks: list of L{objects.Disk} or None
4124 @param disks: which disks to assemble (or all, if None)
4125 @type ignore_secondaries: boolean
4126 @param ignore_secondaries: if true, errors on secondary nodes
4127 won't result in an error return from the function
4128 @type ignore_size: boolean
4129 @param ignore_size: if true, the current known size of the disk
4130 will not be used during the disk activation, useful for cases
4131 when the size is wrong
4132 @return: False if the operation failed, otherwise a list of
4133 (host, instance_visible_name, node_visible_name)
4134 with the mapping from node devices to instance devices
4139 iname = instance.name
4140 disks = _ExpandCheckDisks(instance, disks)
4142 # With the two passes mechanism we try to reduce the window of
4143 # opportunity for the race condition of switching DRBD to primary
4144 # before handshaking occured, but we do not eliminate it
4146 # The proper fix would be to wait (with some limits) until the
4147 # connection has been made and drbd transitions from WFConnection
4148 # into any other network-connected state (Connected, SyncTarget,
4151 # 1st pass, assemble on all nodes in secondary mode
4152 for inst_disk in disks:
4153 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4155 node_disk = node_disk.Copy()
4156 node_disk.UnsetSize()
4157 lu.cfg.SetDiskID(node_disk, node)
4158 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4159 msg = result.fail_msg
4161 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4162 " (is_primary=False, pass=1): %s",
4163 inst_disk.iv_name, node, msg)
4164 if not ignore_secondaries:
4167 # FIXME: race condition on drbd migration to primary
4169 # 2nd pass, do only the primary node
4170 for inst_disk in disks:
4173 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4174 if node != instance.primary_node:
4177 node_disk = node_disk.Copy()
4178 node_disk.UnsetSize()
4179 lu.cfg.SetDiskID(node_disk, node)
4180 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4181 msg = result.fail_msg
4183 lu.proc.LogWarning("Could not prepare block device %s on node %s"
4184 " (is_primary=True, pass=2): %s",
4185 inst_disk.iv_name, node, msg)
4188 dev_path = result.payload
4190 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4192 # leave the disks configured for the primary node
4193 # this is a workaround that would be fixed better by
4194 # improving the logical/physical id handling
4196 lu.cfg.SetDiskID(disk, instance.primary_node)
4198 return disks_ok, device_info
4201 def _StartInstanceDisks(lu, instance, force):
4202 """Start the disks of an instance.
4205 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4206 ignore_secondaries=force)
4208 _ShutdownInstanceDisks(lu, instance)
4209 if force is not None and not force:
4210 lu.proc.LogWarning("", hint="If the message above refers to a"
4212 " you can retry the operation using '--force'.")
4213 raise errors.OpExecError("Disk consistency error")
4216 class LUDeactivateInstanceDisks(NoHooksLU):
4217 """Shutdown an instance's disks.
4220 _OP_REQP = [("instance_name", _TNonEmptyString)]
4223 def ExpandNames(self):
4224 self._ExpandAndLockInstance()
4225 self.needed_locks[locking.LEVEL_NODE] = []
4226 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4228 def DeclareLocks(self, level):
4229 if level == locking.LEVEL_NODE:
4230 self._LockInstancesNodes()
4232 def CheckPrereq(self):
4233 """Check prerequisites.
4235 This checks that the instance is in the cluster.
4238 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4239 assert self.instance is not None, \
4240 "Cannot retrieve locked instance %s" % self.op.instance_name
4242 def Exec(self, feedback_fn):
4243 """Deactivate the disks
4246 instance = self.instance
4247 _SafeShutdownInstanceDisks(self, instance)
4250 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4251 """Shutdown block devices of an instance.
4253 This function checks if an instance is running, before calling
4254 _ShutdownInstanceDisks.
4257 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4258 _ShutdownInstanceDisks(lu, instance, disks=disks)
4261 def _ExpandCheckDisks(instance, disks):
4262 """Return the instance disks selected by the disks list
4264 @type disks: list of L{objects.Disk} or None
4265 @param disks: selected disks
4266 @rtype: list of L{objects.Disk}
4267 @return: selected instance disks to act on
4271 return instance.disks
4273 if not set(disks).issubset(instance.disks):
4274 raise errors.ProgrammerError("Can only act on disks belonging to the"
4279 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4280 """Shutdown block devices of an instance.
4282 This does the shutdown on all nodes of the instance.
4284 If the ignore_primary is false, errors on the primary node are
4289 disks = _ExpandCheckDisks(instance, disks)
4292 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4293 lu.cfg.SetDiskID(top_disk, node)
4294 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4295 msg = result.fail_msg
4297 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4298 disk.iv_name, node, msg)
4299 if not ignore_primary or node != instance.primary_node:
4304 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4305 """Checks if a node has enough free memory.
4307 This function check if a given node has the needed amount of free
4308 memory. In case the node has less memory or we cannot get the
4309 information from the node, this function raise an OpPrereqError
4312 @type lu: C{LogicalUnit}
4313 @param lu: a logical unit from which we get configuration data
4315 @param node: the node to check
4316 @type reason: C{str}
4317 @param reason: string to use in the error message
4318 @type requested: C{int}
4319 @param requested: the amount of memory in MiB to check for
4320 @type hypervisor_name: C{str}
4321 @param hypervisor_name: the hypervisor to ask for memory stats
4322 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4323 we cannot check the node
4326 nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4327 nodeinfo[node].Raise("Can't get data from node %s" % node,
4328 prereq=True, ecode=errors.ECODE_ENVIRON)
4329 free_mem = nodeinfo[node].payload.get('memory_free', None)
4330 if not isinstance(free_mem, int):
4331 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4332 " was '%s'" % (node, free_mem),
4333 errors.ECODE_ENVIRON)
4334 if requested > free_mem:
4335 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4336 " needed %s MiB, available %s MiB" %
4337 (node, reason, requested, free_mem),
4341 def _CheckNodesFreeDisk(lu, nodenames, requested):
4342 """Checks if nodes have enough free disk space in the default VG.
4344 This function check if all given nodes have the needed amount of
4345 free disk. In case any node has less disk or we cannot get the
4346 information from the node, this function raise an OpPrereqError
4349 @type lu: C{LogicalUnit}
4350 @param lu: a logical unit from which we get configuration data
4351 @type nodenames: C{list}
4352 @param nodenames: the list of node names to check
4353 @type requested: C{int}
4354 @param requested: the amount of disk in MiB to check for
4355 @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4356 we cannot check the node
4359 nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4360 lu.cfg.GetHypervisorType())
4361 for node in nodenames:
4362 info = nodeinfo[node]
4363 info.Raise("Cannot get current information from node %s" % node,
4364 prereq=True, ecode=errors.ECODE_ENVIRON)
4365 vg_free = info.payload.get("vg_free", None)
4366 if not isinstance(vg_free, int):
4367 raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4368 " result was '%s'" % (node, vg_free),
4369 errors.ECODE_ENVIRON)
4370 if requested > vg_free:
4371 raise errors.OpPrereqError("Not enough disk space on target node %s:"
4372 " required %d MiB, available %d MiB" %
4373 (node, requested, vg_free),
4377 class LUStartupInstance(LogicalUnit):
4378 """Starts an instance.
4381 HPATH = "instance-start"
4382 HTYPE = constants.HTYPE_INSTANCE
4384 ("instance_name", _TNonEmptyString),
4386 ("beparams", _TDict),
4387 ("hvparams", _TDict),
4390 ("beparams", _EmptyDict),
4391 ("hvparams", _EmptyDict),
4395 def CheckArguments(self):
4397 if self.op.beparams:
4398 # fill the beparams dict
4399 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4401 def ExpandNames(self):
4402 self._ExpandAndLockInstance()
4404 def BuildHooksEnv(self):
4407 This runs on master, primary and secondary nodes of the instance.
4411 "FORCE": self.op.force,
4413 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4414 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4417 def CheckPrereq(self):
4418 """Check prerequisites.
4420 This checks that the instance is in the cluster.
4423 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4424 assert self.instance is not None, \
4425 "Cannot retrieve locked instance %s" % self.op.instance_name
4428 if self.op.hvparams:
4429 # check hypervisor parameter syntax (locally)
4430 cluster = self.cfg.GetClusterInfo()
4431 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4432 filled_hvp = cluster.FillHV(instance)
4433 filled_hvp.update(self.op.hvparams)
4434 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4435 hv_type.CheckParameterSyntax(filled_hvp)
4436 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4438 _CheckNodeOnline(self, instance.primary_node)
4440 bep = self.cfg.GetClusterInfo().FillBE(instance)
4441 # check bridges existence
4442 _CheckInstanceBridgesExist(self, instance)
4444 remote_info = self.rpc.call_instance_info(instance.primary_node,
4446 instance.hypervisor)
4447 remote_info.Raise("Error checking node %s" % instance.primary_node,
4448 prereq=True, ecode=errors.ECODE_ENVIRON)
4449 if not remote_info.payload: # not running already
4450 _CheckNodeFreeMemory(self, instance.primary_node,
4451 "starting instance %s" % instance.name,
4452 bep[constants.BE_MEMORY], instance.hypervisor)
4454 def Exec(self, feedback_fn):
4455 """Start the instance.
4458 instance = self.instance
4459 force = self.op.force
4461 self.cfg.MarkInstanceUp(instance.name)
4463 node_current = instance.primary_node
4465 _StartInstanceDisks(self, instance, force)
4467 result = self.rpc.call_instance_start(node_current, instance,
4468 self.op.hvparams, self.op.beparams)
4469 msg = result.fail_msg
4471 _ShutdownInstanceDisks(self, instance)
4472 raise errors.OpExecError("Could not start instance: %s" % msg)
4475 class LURebootInstance(LogicalUnit):
4476 """Reboot an instance.
4479 HPATH = "instance-reboot"
4480 HTYPE = constants.HTYPE_INSTANCE
4482 ("instance_name", _TNonEmptyString),
4483 ("ignore_secondaries", _TBool),
4484 ("reboot_type", _TElemOf(constants.REBOOT_TYPES)),
4486 _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4489 def ExpandNames(self):
4490 self._ExpandAndLockInstance()
4492 def BuildHooksEnv(self):
4495 This runs on master, primary and secondary nodes of the instance.
4499 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4500 "REBOOT_TYPE": self.op.reboot_type,
4501 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4503 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4504 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4507 def CheckPrereq(self):
4508 """Check prerequisites.
4510 This checks that the instance is in the cluster.
4513 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4514 assert self.instance is not None, \
4515 "Cannot retrieve locked instance %s" % self.op.instance_name
4517 _CheckNodeOnline(self, instance.primary_node)
4519 # check bridges existence
4520 _CheckInstanceBridgesExist(self, instance)
4522 def Exec(self, feedback_fn):
4523 """Reboot the instance.
4526 instance = self.instance
4527 ignore_secondaries = self.op.ignore_secondaries
4528 reboot_type = self.op.reboot_type
4530 node_current = instance.primary_node
4532 if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4533 constants.INSTANCE_REBOOT_HARD]:
4534 for disk in instance.disks:
4535 self.cfg.SetDiskID(disk, node_current)
4536 result = self.rpc.call_instance_reboot(node_current, instance,
4538 self.op.shutdown_timeout)
4539 result.Raise("Could not reboot instance")
4541 result = self.rpc.call_instance_shutdown(node_current, instance,
4542 self.op.shutdown_timeout)
4543 result.Raise("Could not shutdown instance for full reboot")
4544 _ShutdownInstanceDisks(self, instance)
4545 _StartInstanceDisks(self, instance, ignore_secondaries)
4546 result = self.rpc.call_instance_start(node_current, instance, None, None)
4547 msg = result.fail_msg
4549 _ShutdownInstanceDisks(self, instance)
4550 raise errors.OpExecError("Could not start instance for"
4551 " full reboot: %s" % msg)
4553 self.cfg.MarkInstanceUp(instance.name)
4556 class LUShutdownInstance(LogicalUnit):
4557 """Shutdown an instance.
4560 HPATH = "instance-stop"
4561 HTYPE = constants.HTYPE_INSTANCE
4562 _OP_REQP = [("instance_name", _TNonEmptyString)]
4563 _OP_DEFS = [("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4566 def ExpandNames(self):
4567 self._ExpandAndLockInstance()
4569 def BuildHooksEnv(self):
4572 This runs on master, primary and secondary nodes of the instance.
4575 env = _BuildInstanceHookEnvByObject(self, self.instance)
4576 env["TIMEOUT"] = self.op.timeout
4577 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4580 def CheckPrereq(self):
4581 """Check prerequisites.
4583 This checks that the instance is in the cluster.
4586 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4587 assert self.instance is not None, \
4588 "Cannot retrieve locked instance %s" % self.op.instance_name
4589 _CheckNodeOnline(self, self.instance.primary_node)
4591 def Exec(self, feedback_fn):
4592 """Shutdown the instance.
4595 instance = self.instance
4596 node_current = instance.primary_node
4597 timeout = self.op.timeout
4598 self.cfg.MarkInstanceDown(instance.name)
4599 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4600 msg = result.fail_msg
4602 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4604 _ShutdownInstanceDisks(self, instance)
4607 class LUReinstallInstance(LogicalUnit):
4608 """Reinstall an instance.
4611 HPATH = "instance-reinstall"
4612 HTYPE = constants.HTYPE_INSTANCE
4613 _OP_REQP = [("instance_name", _TNonEmptyString)]
4616 ("force_variant", False),
4620 def ExpandNames(self):
4621 self._ExpandAndLockInstance()
4623 def BuildHooksEnv(self):
4626 This runs on master, primary and secondary nodes of the instance.
4629 env = _BuildInstanceHookEnvByObject(self, self.instance)
4630 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4633 def CheckPrereq(self):
4634 """Check prerequisites.
4636 This checks that the instance is in the cluster and is not running.
4639 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4640 assert instance is not None, \
4641 "Cannot retrieve locked instance %s" % self.op.instance_name
4642 _CheckNodeOnline(self, instance.primary_node)
4644 if instance.disk_template == constants.DT_DISKLESS:
4645 raise errors.OpPrereqError("Instance '%s' has no disks" %
4646 self.op.instance_name,
4648 _CheckInstanceDown(self, instance, "cannot reinstall")
4650 if self.op.os_type is not None:
4652 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4653 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4655 self.instance = instance
4657 def Exec(self, feedback_fn):
4658 """Reinstall the instance.
4661 inst = self.instance
4663 if self.op.os_type is not None:
4664 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4665 inst.os = self.op.os_type
4666 self.cfg.Update(inst, feedback_fn)
4668 _StartInstanceDisks(self, inst, None)
4670 feedback_fn("Running the instance OS create scripts...")
4671 # FIXME: pass debug option from opcode to backend
4672 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4673 self.op.debug_level)
4674 result.Raise("Could not install OS for instance %s on node %s" %
4675 (inst.name, inst.primary_node))
4677 _ShutdownInstanceDisks(self, inst)
4680 class LURecreateInstanceDisks(LogicalUnit):
4681 """Recreate an instance's missing disks.
4684 HPATH = "instance-recreate-disks"
4685 HTYPE = constants.HTYPE_INSTANCE
4687 ("instance_name", _TNonEmptyString),
4688 ("disks", _TListOf(_TPositiveInt)),
4692 def ExpandNames(self):
4693 self._ExpandAndLockInstance()
4695 def BuildHooksEnv(self):
4698 This runs on master, primary and secondary nodes of the instance.
4701 env = _BuildInstanceHookEnvByObject(self, self.instance)
4702 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4705 def CheckPrereq(self):
4706 """Check prerequisites.
4708 This checks that the instance is in the cluster and is not running.
4711 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4712 assert instance is not None, \
4713 "Cannot retrieve locked instance %s" % self.op.instance_name
4714 _CheckNodeOnline(self, instance.primary_node)
4716 if instance.disk_template == constants.DT_DISKLESS:
4717 raise errors.OpPrereqError("Instance '%s' has no disks" %
4718 self.op.instance_name, errors.ECODE_INVAL)
4719 _CheckInstanceDown(self, instance, "cannot recreate disks")
4721 if not self.op.disks:
4722 self.op.disks = range(len(instance.disks))
4724 for idx in self.op.disks:
4725 if idx >= len(instance.disks):
4726 raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4729 self.instance = instance
4731 def Exec(self, feedback_fn):
4732 """Recreate the disks.
4736 for idx, _ in enumerate(self.instance.disks):
4737 if idx not in self.op.disks: # disk idx has not been passed in
4741 _CreateDisks(self, self.instance, to_skip=to_skip)
4744 class LURenameInstance(LogicalUnit):
4745 """Rename an instance.
4748 HPATH = "instance-rename"
4749 HTYPE = constants.HTYPE_INSTANCE
4751 ("instance_name", _TNonEmptyString),
4752 ("new_name", _TNonEmptyString),
4754 _OP_DEFS = [("ignore_ip", False)]
4756 def BuildHooksEnv(self):
4759 This runs on master, primary and secondary nodes of the instance.
4762 env = _BuildInstanceHookEnvByObject(self, self.instance)
4763 env["INSTANCE_NEW_NAME"] = self.op.new_name
4764 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4767 def CheckPrereq(self):
4768 """Check prerequisites.
4770 This checks that the instance is in the cluster and is not running.
4773 self.op.instance_name = _ExpandInstanceName(self.cfg,
4774 self.op.instance_name)
4775 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4776 assert instance is not None
4777 _CheckNodeOnline(self, instance.primary_node)
4778 _CheckInstanceDown(self, instance, "cannot rename")
4779 self.instance = instance
4781 # new name verification
4782 name_info = utils.GetHostInfo(self.op.new_name)
4784 self.op.new_name = new_name = name_info.name
4785 instance_list = self.cfg.GetInstanceList()
4786 if new_name in instance_list:
4787 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4788 new_name, errors.ECODE_EXISTS)
4790 if not self.op.ignore_ip:
4791 if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4792 raise errors.OpPrereqError("IP %s of instance %s already in use" %
4793 (name_info.ip, new_name),
4794 errors.ECODE_NOTUNIQUE)
4796 def Exec(self, feedback_fn):
4797 """Reinstall the instance.
4800 inst = self.instance
4801 old_name = inst.name
4803 if inst.disk_template == constants.DT_FILE:
4804 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4806 self.cfg.RenameInstance(inst.name, self.op.new_name)
4807 # Change the instance lock. This is definitely safe while we hold the BGL
4808 self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4809 self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4811 # re-read the instance from the configuration after rename
4812 inst = self.cfg.GetInstanceInfo(self.op.new_name)
4814 if inst.disk_template == constants.DT_FILE:
4815 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4816 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4817 old_file_storage_dir,
4818 new_file_storage_dir)
4819 result.Raise("Could not rename on node %s directory '%s' to '%s'"
4820 " (but the instance has been renamed in Ganeti)" %
4821 (inst.primary_node, old_file_storage_dir,
4822 new_file_storage_dir))
4824 _StartInstanceDisks(self, inst, None)
4826 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4827 old_name, self.op.debug_level)
4828 msg = result.fail_msg
4830 msg = ("Could not run OS rename script for instance %s on node %s"
4831 " (but the instance has been renamed in Ganeti): %s" %
4832 (inst.name, inst.primary_node, msg))
4833 self.proc.LogWarning(msg)
4835 _ShutdownInstanceDisks(self, inst)
4838 class LURemoveInstance(LogicalUnit):
4839 """Remove an instance.
4842 HPATH = "instance-remove"
4843 HTYPE = constants.HTYPE_INSTANCE
4845 ("instance_name", _TNonEmptyString),
4846 ("ignore_failures", _TBool),
4848 _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4851 def ExpandNames(self):
4852 self._ExpandAndLockInstance()
4853 self.needed_locks[locking.LEVEL_NODE] = []
4854 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4856 def DeclareLocks(self, level):
4857 if level == locking.LEVEL_NODE:
4858 self._LockInstancesNodes()
4860 def BuildHooksEnv(self):
4863 This runs on master, primary and secondary nodes of the instance.
4866 env = _BuildInstanceHookEnvByObject(self, self.instance)
4867 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4868 nl = [self.cfg.GetMasterNode()]
4869 nl_post = list(self.instance.all_nodes) + nl
4870 return env, nl, nl_post
4872 def CheckPrereq(self):
4873 """Check prerequisites.
4875 This checks that the instance is in the cluster.
4878 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4879 assert self.instance is not None, \
4880 "Cannot retrieve locked instance %s" % self.op.instance_name
4882 def Exec(self, feedback_fn):
4883 """Remove the instance.
4886 instance = self.instance
4887 logging.info("Shutting down instance %s on node %s",
4888 instance.name, instance.primary_node)
4890 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4891 self.op.shutdown_timeout)
4892 msg = result.fail_msg
4894 if self.op.ignore_failures:
4895 feedback_fn("Warning: can't shutdown instance: %s" % msg)
4897 raise errors.OpExecError("Could not shutdown instance %s on"
4899 (instance.name, instance.primary_node, msg))
4901 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4904 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4905 """Utility function to remove an instance.
4908 logging.info("Removing block devices for instance %s", instance.name)
4910 if not _RemoveDisks(lu, instance):
4911 if not ignore_failures:
4912 raise errors.OpExecError("Can't remove instance's disks")
4913 feedback_fn("Warning: can't remove instance's disks")
4915 logging.info("Removing instance %s out of cluster config", instance.name)
4917 lu.cfg.RemoveInstance(instance.name)
4919 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4920 "Instance lock removal conflict"
4922 # Remove lock for the instance
4923 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4926 class LUQueryInstances(NoHooksLU):
4927 """Logical unit for querying instances.
4930 # pylint: disable-msg=W0142
4932 ("output_fields", _TListOf(_TNonEmptyString)),
4933 ("names", _TListOf(_TNonEmptyString)),
4934 ("use_locking", _TBool),
4937 _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4938 "serial_no", "ctime", "mtime", "uuid"]
4939 _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4941 "disk_template", "ip", "mac", "bridge",
4942 "nic_mode", "nic_link",
4943 "sda_size", "sdb_size", "vcpus", "tags",
4944 "network_port", "beparams",
4945 r"(disk)\.(size)/([0-9]+)",
4946 r"(disk)\.(sizes)", "disk_usage",
4947 r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4948 r"(nic)\.(bridge)/([0-9]+)",
4949 r"(nic)\.(macs|ips|modes|links|bridges)",
4950 r"(disk|nic)\.(count)",
4952 ] + _SIMPLE_FIELDS +
4954 for name in constants.HVS_PARAMETERS
4955 if name not in constants.HVC_GLOBALS] +
4957 for name in constants.BES_PARAMETERS])
4958 _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4961 def CheckArguments(self):
4962 _CheckOutputFields(static=self._FIELDS_STATIC,
4963 dynamic=self._FIELDS_DYNAMIC,
4964 selected=self.op.output_fields)
4966 def ExpandNames(self):
4967 self.needed_locks = {}
4968 self.share_locks[locking.LEVEL_INSTANCE] = 1
4969 self.share_locks[locking.LEVEL_NODE] = 1
4972 self.wanted = _GetWantedInstances(self, self.op.names)
4974 self.wanted = locking.ALL_SET
4976 self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4977 self.do_locking = self.do_node_query and self.op.use_locking
4979 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4980 self.needed_locks[locking.LEVEL_NODE] = []
4981 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4983 def DeclareLocks(self, level):
4984 if level == locking.LEVEL_NODE and self.do_locking:
4985 self._LockInstancesNodes()
4987 def Exec(self, feedback_fn):
4988 """Computes the list of nodes and their attributes.
4991 # pylint: disable-msg=R0912
4992 # way too many branches here
4993 all_info = self.cfg.GetAllInstancesInfo()
4994 if self.wanted == locking.ALL_SET:
4995 # caller didn't specify instance names, so ordering is not important
4997 instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4999 instance_names = all_info.keys()
5000 instance_names = utils.NiceSort(instance_names)
5002 # caller did specify names, so we must keep the ordering
5004 tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5006 tgt_set = all_info.keys()
5007 missing = set(self.wanted).difference(tgt_set)
5009 raise errors.OpExecError("Some instances were removed before"
5010 " retrieving their data: %s" % missing)
5011 instance_names = self.wanted
5013 instance_list = [all_info[iname] for iname in instance_names]
5015 # begin data gathering
5017 nodes = frozenset([inst.primary_node for inst in instance_list])
5018 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5022 if self.do_node_query:
5024 node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5026 result = node_data[name]
5028 # offline nodes will be in both lists
5029 off_nodes.append(name)
5031 bad_nodes.append(name)
5034 live_data.update(result.payload)
5035 # else no instance is alive
5037 live_data = dict([(name, {}) for name in instance_names])
5039 # end data gathering
5044 cluster = self.cfg.GetClusterInfo()
5045 for instance in instance_list:
5047 i_hv = cluster.FillHV(instance, skip_globals=True)
5048 i_be = cluster.FillBE(instance)
5049 i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5050 for field in self.op.output_fields:
5051 st_match = self._FIELDS_STATIC.Matches(field)
5052 if field in self._SIMPLE_FIELDS:
5053 val = getattr(instance, field)
5054 elif field == "pnode":
5055 val = instance.primary_node
5056 elif field == "snodes":
5057 val = list(instance.secondary_nodes)
5058 elif field == "admin_state":
5059 val = instance.admin_up
5060 elif field == "oper_state":
5061 if instance.primary_node in bad_nodes:
5064 val = bool(live_data.get(instance.name))
5065 elif field == "status":
5066 if instance.primary_node in off_nodes:
5067 val = "ERROR_nodeoffline"
5068 elif instance.primary_node in bad_nodes:
5069 val = "ERROR_nodedown"
5071 running = bool(live_data.get(instance.name))
5073 if instance.admin_up:
5078 if instance.admin_up:
5082 elif field == "oper_ram":
5083 if instance.primary_node in bad_nodes:
5085 elif instance.name in live_data:
5086 val = live_data[instance.name].get("memory", "?")
5089 elif field == "vcpus":
5090 val = i_be[constants.BE_VCPUS]
5091 elif field == "disk_template":
5092 val = instance.disk_template
5095 val = instance.nics[0].ip
5098 elif field == "nic_mode":
5100 val = i_nicp[0][constants.NIC_MODE]
5103 elif field == "nic_link":
5105 val = i_nicp[0][constants.NIC_LINK]
5108 elif field == "bridge":
5109 if (instance.nics and
5110 i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5111 val = i_nicp[0][constants.NIC_LINK]
5114 elif field == "mac":
5116 val = instance.nics[0].mac
5119 elif field == "sda_size" or field == "sdb_size":
5120 idx = ord(field[2]) - ord('a')
5122 val = instance.FindDisk(idx).size
5123 except errors.OpPrereqError:
5125 elif field == "disk_usage": # total disk usage per node
5126 disk_sizes = [{'size': disk.size} for disk in instance.disks]
5127 val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5128 elif field == "tags":
5129 val = list(instance.GetTags())
5130 elif field == "hvparams":
5132 elif (field.startswith(HVPREFIX) and
5133 field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5134 field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5135 val = i_hv.get(field[len(HVPREFIX):], None)
5136 elif field == "beparams":
5138 elif (field.startswith(BEPREFIX) and
5139 field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5140 val = i_be.get(field[len(BEPREFIX):], None)
5141 elif st_match and st_match.groups():
5142 # matches a variable list
5143 st_groups = st_match.groups()
5144 if st_groups and st_groups[0] == "disk":
5145 if st_groups[1] == "count":
5146 val = len(instance.disks)
5147 elif st_groups[1] == "sizes":
5148 val = [disk.size for disk in instance.disks]
5149 elif st_groups[1] == "size":
5151 val = instance.FindDisk(st_groups[2]).size
5152 except errors.OpPrereqError:
5155 assert False, "Unhandled disk parameter"
5156 elif st_groups[0] == "nic":
5157 if st_groups[1] == "count":
5158 val = len(instance.nics)
5159 elif st_groups[1] == "macs":
5160 val = [nic.mac for nic in instance.nics]
5161 elif st_groups[1] == "ips":
5162 val = [nic.ip for nic in instance.nics]
5163 elif st_groups[1] == "modes":
5164 val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5165 elif st_groups[1] == "links":
5166 val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5167 elif st_groups[1] == "bridges":
5170 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5171 val.append(nicp[constants.NIC_LINK])
5176 nic_idx = int(st_groups[2])
5177 if nic_idx >= len(instance.nics):
5180 if st_groups[1] == "mac":
5181 val = instance.nics[nic_idx].mac
5182 elif st_groups[1] == "ip":
5183 val = instance.nics[nic_idx].ip
5184 elif st_groups[1] == "mode":
5185 val = i_nicp[nic_idx][constants.NIC_MODE]
5186 elif st_groups[1] == "link":
5187 val = i_nicp[nic_idx][constants.NIC_LINK]
5188 elif st_groups[1] == "bridge":
5189 nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5190 if nic_mode == constants.NIC_MODE_BRIDGED:
5191 val = i_nicp[nic_idx][constants.NIC_LINK]
5195 assert False, "Unhandled NIC parameter"
5197 assert False, ("Declared but unhandled variable parameter '%s'" %
5200 assert False, "Declared but unhandled parameter '%s'" % field
5207 class LUFailoverInstance(LogicalUnit):
5208 """Failover an instance.
5211 HPATH = "instance-failover"
5212 HTYPE = constants.HTYPE_INSTANCE
5214 ("instance_name", _TNonEmptyString),
5215 ("ignore_consistency", _TBool),
5217 _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
5220 def ExpandNames(self):
5221 self._ExpandAndLockInstance()
5222 self.needed_locks[locking.LEVEL_NODE] = []
5223 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5225 def DeclareLocks(self, level):
5226 if level == locking.LEVEL_NODE:
5227 self._LockInstancesNodes()
5229 def BuildHooksEnv(self):
5232 This runs on master, primary and secondary nodes of the instance.
5235 instance = self.instance
5236 source_node = instance.primary_node
5237 target_node = instance.secondary_nodes[0]
5239 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5240 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5241 "OLD_PRIMARY": source_node,
5242 "OLD_SECONDARY": target_node,
5243 "NEW_PRIMARY": target_node,
5244 "NEW_SECONDARY": source_node,
5246 env.update(_BuildInstanceHookEnvByObject(self, instance))
5247 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5249 nl_post.append(source_node)
5250 return env, nl, nl_post
5252 def CheckPrereq(self):
5253 """Check prerequisites.
5255 This checks that the instance is in the cluster.
5258 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5259 assert self.instance is not None, \
5260 "Cannot retrieve locked instance %s" % self.op.instance_name
5262 bep = self.cfg.GetClusterInfo().FillBE(instance)
5263 if instance.disk_template not in constants.DTS_NET_MIRROR:
5264 raise errors.OpPrereqError("Instance's disk layout is not"
5265 " network mirrored, cannot failover.",
5268 secondary_nodes = instance.secondary_nodes
5269 if not secondary_nodes:
5270 raise errors.ProgrammerError("no secondary node but using "
5271 "a mirrored disk template")
5273 target_node = secondary_nodes[0]
5274 _CheckNodeOnline(self, target_node)
5275 _CheckNodeNotDrained(self, target_node)
5276 if instance.admin_up:
5277 # check memory requirements on the secondary node
5278 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5279 instance.name, bep[constants.BE_MEMORY],
5280 instance.hypervisor)
5282 self.LogInfo("Not checking memory on the secondary node as"
5283 " instance will not be started")
5285 # check bridge existance
5286 _CheckInstanceBridgesExist(self, instance, node=target_node)
5288 def Exec(self, feedback_fn):
5289 """Failover an instance.
5291 The failover is done by shutting it down on its present node and
5292 starting it on the secondary.
5295 instance = self.instance
5297 source_node = instance.primary_node
5298 target_node = instance.secondary_nodes[0]
5300 if instance.admin_up:
5301 feedback_fn("* checking disk consistency between source and target")
5302 for dev in instance.disks:
5303 # for drbd, these are drbd over lvm
5304 if not _CheckDiskConsistency(self, dev, target_node, False):
5305 if not self.op.ignore_consistency:
5306 raise errors.OpExecError("Disk %s is degraded on target node,"
5307 " aborting failover." % dev.iv_name)
5309 feedback_fn("* not checking disk consistency as instance is not running")
5311 feedback_fn("* shutting down instance on source node")
5312 logging.info("Shutting down instance %s on node %s",
5313 instance.name, source_node)
5315 result = self.rpc.call_instance_shutdown(source_node, instance,
5316 self.op.shutdown_timeout)
5317 msg = result.fail_msg
5319 if self.op.ignore_consistency:
5320 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5321 " Proceeding anyway. Please make sure node"
5322 " %s is down. Error details: %s",
5323 instance.name, source_node, source_node, msg)
5325 raise errors.OpExecError("Could not shutdown instance %s on"
5327 (instance.name, source_node, msg))
5329 feedback_fn("* deactivating the instance's disks on source node")
5330 if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5331 raise errors.OpExecError("Can't shut down the instance's disks.")
5333 instance.primary_node = target_node
5334 # distribute new instance config to the other nodes
5335 self.cfg.Update(instance, feedback_fn)
5337 # Only start the instance if it's marked as up
5338 if instance.admin_up:
5339 feedback_fn("* activating the instance's disks on target node")
5340 logging.info("Starting instance %s on node %s",
5341 instance.name, target_node)
5343 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5344 ignore_secondaries=True)
5346 _ShutdownInstanceDisks(self, instance)
5347 raise errors.OpExecError("Can't activate the instance's disks")
5349 feedback_fn("* starting the instance on the target node")
5350 result = self.rpc.call_instance_start(target_node, instance, None, None)
5351 msg = result.fail_msg
5353 _ShutdownInstanceDisks(self, instance)
5354 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5355 (instance.name, target_node, msg))
5358 class LUMigrateInstance(LogicalUnit):
5359 """Migrate an instance.
5361 This is migration without shutting down, compared to the failover,
5362 which is done with shutdown.
5365 HPATH = "instance-migrate"
5366 HTYPE = constants.HTYPE_INSTANCE
5368 ("instance_name", _TNonEmptyString),
5370 ("cleanup", _TBool),
5375 def ExpandNames(self):
5376 self._ExpandAndLockInstance()
5378 self.needed_locks[locking.LEVEL_NODE] = []
5379 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5381 self._migrater = TLMigrateInstance(self, self.op.instance_name,
5382 self.op.live, self.op.cleanup)
5383 self.tasklets = [self._migrater]
5385 def DeclareLocks(self, level):
5386 if level == locking.LEVEL_NODE:
5387 self._LockInstancesNodes()
5389 def BuildHooksEnv(self):
5392 This runs on master, primary and secondary nodes of the instance.
5395 instance = self._migrater.instance
5396 source_node = instance.primary_node
5397 target_node = instance.secondary_nodes[0]
5398 env = _BuildInstanceHookEnvByObject(self, instance)
5399 env["MIGRATE_LIVE"] = self.op.live
5400 env["MIGRATE_CLEANUP"] = self.op.cleanup
5402 "OLD_PRIMARY": source_node,
5403 "OLD_SECONDARY": target_node,
5404 "NEW_PRIMARY": target_node,
5405 "NEW_SECONDARY": source_node,
5407 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5409 nl_post.append(source_node)
5410 return env, nl, nl_post
5413 class LUMoveInstance(LogicalUnit):
5414 """Move an instance by data-copying.
5417 HPATH = "instance-move"
5418 HTYPE = constants.HTYPE_INSTANCE
5420 ("instance_name", _TNonEmptyString),
5421 ("target_node", _TNonEmptyString),
5423 _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
5426 def ExpandNames(self):
5427 self._ExpandAndLockInstance()
5428 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5429 self.op.target_node = target_node
5430 self.needed_locks[locking.LEVEL_NODE] = [target_node]
5431 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5433 def DeclareLocks(self, level):
5434 if level == locking.LEVEL_NODE:
5435 self._LockInstancesNodes(primary_only=True)
5437 def BuildHooksEnv(self):
5440 This runs on master, primary and secondary nodes of the instance.
5444 "TARGET_NODE": self.op.target_node,
5445 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5447 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5448 nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5449 self.op.target_node]
5452 def CheckPrereq(self):
5453 """Check prerequisites.
5455 This checks that the instance is in the cluster.
5458 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5459 assert self.instance is not None, \
5460 "Cannot retrieve locked instance %s" % self.op.instance_name
5462 node = self.cfg.GetNodeInfo(self.op.target_node)
5463 assert node is not None, \
5464 "Cannot retrieve locked node %s" % self.op.target_node
5466 self.target_node = target_node = node.name
5468 if target_node == instance.primary_node:
5469 raise errors.OpPrereqError("Instance %s is already on the node %s" %
5470 (instance.name, target_node),
5473 bep = self.cfg.GetClusterInfo().FillBE(instance)
5475 for idx, dsk in enumerate(instance.disks):
5476 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5477 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5478 " cannot copy" % idx, errors.ECODE_STATE)
5480 _CheckNodeOnline(self, target_node)
5481 _CheckNodeNotDrained(self, target_node)
5483 if instance.admin_up:
5484 # check memory requirements on the secondary node
5485 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5486 instance.name, bep[constants.BE_MEMORY],
5487 instance.hypervisor)
5489 self.LogInfo("Not checking memory on the secondary node as"
5490 " instance will not be started")
5492 # check bridge existance
5493 _CheckInstanceBridgesExist(self, instance, node=target_node)
5495 def Exec(self, feedback_fn):
5496 """Move an instance.
5498 The move is done by shutting it down on its present node, copying
5499 the data over (slow) and starting it on the new node.
5502 instance = self.instance
5504 source_node = instance.primary_node
5505 target_node = self.target_node
5507 self.LogInfo("Shutting down instance %s on source node %s",
5508 instance.name, source_node)
5510 result = self.rpc.call_instance_shutdown(source_node, instance,
5511 self.op.shutdown_timeout)
5512 msg = result.fail_msg
5514 if self.op.ignore_consistency:
5515 self.proc.LogWarning("Could not shutdown instance %s on node %s."
5516 " Proceeding anyway. Please make sure node"
5517 " %s is down. Error details: %s",
5518 instance.name, source_node, source_node, msg)
5520 raise errors.OpExecError("Could not shutdown instance %s on"
5522 (instance.name, source_node, msg))
5524 # create the target disks
5526 _CreateDisks(self, instance, target_node=target_node)
5527 except errors.OpExecError:
5528 self.LogWarning("Device creation failed, reverting...")
5530 _RemoveDisks(self, instance, target_node=target_node)
5532 self.cfg.ReleaseDRBDMinors(instance.name)
5535 cluster_name = self.cfg.GetClusterInfo().cluster_name
5538 # activate, get path, copy the data over
5539 for idx, disk in enumerate(instance.disks):
5540 self.LogInfo("Copying data for disk %d", idx)
5541 result = self.rpc.call_blockdev_assemble(target_node, disk,
5542 instance.name, True)
5544 self.LogWarning("Can't assemble newly created disk %d: %s",
5545 idx, result.fail_msg)
5546 errs.append(result.fail_msg)
5548 dev_path = result.payload
5549 result = self.rpc.call_blockdev_export(source_node, disk,
5550 target_node, dev_path,
5553 self.LogWarning("Can't copy data over for disk %d: %s",
5554 idx, result.fail_msg)
5555 errs.append(result.fail_msg)
5559 self.LogWarning("Some disks failed to copy, aborting")
5561 _RemoveDisks(self, instance, target_node=target_node)
5563 self.cfg.ReleaseDRBDMinors(instance.name)
5564 raise errors.OpExecError("Errors during disk copy: %s" %
5567 instance.primary_node = target_node
5568 self.cfg.Update(instance, feedback_fn)
5570 self.LogInfo("Removing the disks on the original node")
5571 _RemoveDisks(self, instance, target_node=source_node)
5573 # Only start the instance if it's marked as up
5574 if instance.admin_up:
5575 self.LogInfo("Starting instance %s on node %s",
5576 instance.name, target_node)
5578 disks_ok, _ = _AssembleInstanceDisks(self, instance,
5579 ignore_secondaries=True)
5581 _ShutdownInstanceDisks(self, instance)
5582 raise errors.OpExecError("Can't activate the instance's disks")
5584 result = self.rpc.call_instance_start(target_node, instance, None, None)
5585 msg = result.fail_msg
5587 _ShutdownInstanceDisks(self, instance)
5588 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5589 (instance.name, target_node, msg))
5592 class LUMigrateNode(LogicalUnit):
5593 """Migrate all instances from a node.
5596 HPATH = "node-migrate"
5597 HTYPE = constants.HTYPE_NODE
5599 ("node_name", _TNonEmptyString),
5604 def ExpandNames(self):
5605 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5607 self.needed_locks = {
5608 locking.LEVEL_NODE: [self.op.node_name],
5611 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5613 # Create tasklets for migrating instances for all instances on this node
5617 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5618 logging.debug("Migrating instance %s", inst.name)
5619 names.append(inst.name)
5621 tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5623 self.tasklets = tasklets
5625 # Declare instance locks
5626 self.needed_locks[locking.LEVEL_INSTANCE] = names
5628 def DeclareLocks(self, level):
5629 if level == locking.LEVEL_NODE:
5630 self._LockInstancesNodes()
5632 def BuildHooksEnv(self):
5635 This runs on the master, the primary and all the secondaries.
5639 "NODE_NAME": self.op.node_name,
5642 nl = [self.cfg.GetMasterNode()]
5644 return (env, nl, nl)
5647 class TLMigrateInstance(Tasklet):
5648 def __init__(self, lu, instance_name, live, cleanup):
5649 """Initializes this class.
5652 Tasklet.__init__(self, lu)
5655 self.instance_name = instance_name
5657 self.cleanup = cleanup
5659 def CheckPrereq(self):
5660 """Check prerequisites.
5662 This checks that the instance is in the cluster.
5665 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5666 instance = self.cfg.GetInstanceInfo(instance_name)
5667 assert instance is not None
5669 if instance.disk_template != constants.DT_DRBD8:
5670 raise errors.OpPrereqError("Instance's disk layout is not"
5671 " drbd8, cannot migrate.", errors.ECODE_STATE)
5673 secondary_nodes = instance.secondary_nodes
5674 if not secondary_nodes:
5675 raise errors.ConfigurationError("No secondary node but using"
5676 " drbd8 disk template")
5678 i_be = self.cfg.GetClusterInfo().FillBE(instance)
5680 target_node = secondary_nodes[0]
5681 # check memory requirements on the secondary node
5682 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5683 instance.name, i_be[constants.BE_MEMORY],
5684 instance.hypervisor)
5686 # check bridge existance
5687 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5689 if not self.cleanup:
5690 _CheckNodeNotDrained(self.lu, target_node)
5691 result = self.rpc.call_instance_migratable(instance.primary_node,
5693 result.Raise("Can't migrate, please use failover",
5694 prereq=True, ecode=errors.ECODE_STATE)
5696 self.instance = instance
5698 def _WaitUntilSync(self):
5699 """Poll with custom rpc for disk sync.
5701 This uses our own step-based rpc call.
5704 self.feedback_fn("* wait until resync is done")
5708 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5710 self.instance.disks)
5712 for node, nres in result.items():
5713 nres.Raise("Cannot resync disks on node %s" % node)
5714 node_done, node_percent = nres.payload
5715 all_done = all_done and node_done
5716 if node_percent is not None:
5717 min_percent = min(min_percent, node_percent)
5719 if min_percent < 100:
5720 self.feedback_fn(" - progress: %.1f%%" % min_percent)
5723 def _EnsureSecondary(self, node):
5724 """Demote a node to secondary.
5727 self.feedback_fn("* switching node %s to secondary mode" % node)
5729 for dev in self.instance.disks:
5730 self.cfg.SetDiskID(dev, node)
5732 result = self.rpc.call_blockdev_close(node, self.instance.name,
5733 self.instance.disks)
5734 result.Raise("Cannot change disk to secondary on node %s" % node)
5736 def _GoStandalone(self):
5737 """Disconnect from the network.
5740 self.feedback_fn("* changing into standalone mode")
5741 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5742 self.instance.disks)
5743 for node, nres in result.items():
5744 nres.Raise("Cannot disconnect disks node %s" % node)
5746 def _GoReconnect(self, multimaster):
5747 """Reconnect to the network.
5753 msg = "single-master"
5754 self.feedback_fn("* changing disks into %s mode" % msg)
5755 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5756 self.instance.disks,
5757 self.instance.name, multimaster)
5758 for node, nres in result.items():
5759 nres.Raise("Cannot change disks config on node %s" % node)
5761 def _ExecCleanup(self):
5762 """Try to cleanup after a failed migration.
5764 The cleanup is done by:
5765 - check that the instance is running only on one node
5766 (and update the config if needed)
5767 - change disks on its secondary node to secondary
5768 - wait until disks are fully synchronized
5769 - disconnect from the network
5770 - change disks into single-master mode
5771 - wait again until disks are fully synchronized
5774 instance = self.instance
5775 target_node = self.target_node
5776 source_node = self.source_node
5778 # check running on only one node
5779 self.feedback_fn("* checking where the instance actually runs"
5780 " (if this hangs, the hypervisor might be in"
5782 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5783 for node, result in ins_l.items():
5784 result.Raise("Can't contact node %s" % node)
5786 runningon_source = instance.name in ins_l[source_node].payload
5787 runningon_target = instance.name in ins_l[target_node].payload
5789 if runningon_source and runningon_target:
5790 raise errors.OpExecError("Instance seems to be running on two nodes,"
5791 " or the hypervisor is confused. You will have"
5792 " to ensure manually that it runs only on one"
5793 " and restart this operation.")
5795 if not (runningon_source or runningon_target):
5796 raise errors.OpExecError("Instance does not seem to be running at all."
5797 " In this case, it's safer to repair by"
5798 " running 'gnt-instance stop' to ensure disk"
5799 " shutdown, and then restarting it.")
5801 if runningon_target:
5802 # the migration has actually succeeded, we need to update the config
5803 self.feedback_fn("* instance running on secondary node (%s),"
5804 " updating config" % target_node)
5805 instance.primary_node = target_node
5806 self.cfg.Update(instance, self.feedback_fn)
5807 demoted_node = source_node
5809 self.feedback_fn("* instance confirmed to be running on its"
5810 " primary node (%s)" % source_node)
5811 demoted_node = target_node
5813 self._EnsureSecondary(demoted_node)
5815 self._WaitUntilSync()
5816 except errors.OpExecError:
5817 # we ignore here errors, since if the device is standalone, it
5818 # won't be able to sync
5820 self._GoStandalone()
5821 self._GoReconnect(False)
5822 self._WaitUntilSync()
5824 self.feedback_fn("* done")
5826 def _RevertDiskStatus(self):
5827 """Try to revert the disk status after a failed migration.
5830 target_node = self.target_node
5832 self._EnsureSecondary(target_node)
5833 self._GoStandalone()
5834 self._GoReconnect(False)
5835 self._WaitUntilSync()
5836 except errors.OpExecError, err:
5837 self.lu.LogWarning("Migration failed and I can't reconnect the"
5838 " drives: error '%s'\n"
5839 "Please look and recover the instance status" %
5842 def _AbortMigration(self):
5843 """Call the hypervisor code to abort a started migration.
5846 instance = self.instance
5847 target_node = self.target_node
5848 migration_info = self.migration_info
5850 abort_result = self.rpc.call_finalize_migration(target_node,
5854 abort_msg = abort_result.fail_msg
5856 logging.error("Aborting migration failed on target node %s: %s",
5857 target_node, abort_msg)
5858 # Don't raise an exception here, as we stil have to try to revert the
5859 # disk status, even if this step failed.
5861 def _ExecMigration(self):
5862 """Migrate an instance.
5864 The migrate is done by:
5865 - change the disks into dual-master mode
5866 - wait until disks are fully synchronized again
5867 - migrate the instance
5868 - change disks on the new secondary node (the old primary) to secondary
5869 - wait until disks are fully synchronized
5870 - change disks into single-master mode
5873 instance = self.instance
5874 target_node = self.target_node
5875 source_node = self.source_node
5877 self.feedback_fn("* checking disk consistency between source and target")
5878 for dev in instance.disks:
5879 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5880 raise errors.OpExecError("Disk %s is degraded or not fully"
5881 " synchronized on target node,"
5882 " aborting migrate." % dev.iv_name)
5884 # First get the migration information from the remote node
5885 result = self.rpc.call_migration_info(source_node, instance)
5886 msg = result.fail_msg
5888 log_err = ("Failed fetching source migration information from %s: %s" %
5890 logging.error(log_err)
5891 raise errors.OpExecError(log_err)
5893 self.migration_info = migration_info = result.payload
5895 # Then switch the disks to master/master mode
5896 self._EnsureSecondary(target_node)
5897 self._GoStandalone()
5898 self._GoReconnect(True)
5899 self._WaitUntilSync()
5901 self.feedback_fn("* preparing %s to accept the instance" % target_node)
5902 result = self.rpc.call_accept_instance(target_node,
5905 self.nodes_ip[target_node])
5907 msg = result.fail_msg
5909 logging.error("Instance pre-migration failed, trying to revert"
5910 " disk status: %s", msg)
5911 self.feedback_fn("Pre-migration failed, aborting")
5912 self._AbortMigration()
5913 self._RevertDiskStatus()
5914 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5915 (instance.name, msg))
5917 self.feedback_fn("* migrating instance to %s" % target_node)
5919 result = self.rpc.call_instance_migrate(source_node, instance,
5920 self.nodes_ip[target_node],
5922 msg = result.fail_msg
5924 logging.error("Instance migration failed, trying to revert"
5925 " disk status: %s", msg)
5926 self.feedback_fn("Migration failed, aborting")
5927 self._AbortMigration()
5928 self._RevertDiskStatus()
5929 raise errors.OpExecError("Could not migrate instance %s: %s" %
5930 (instance.name, msg))
5933 instance.primary_node = target_node
5934 # distribute new instance config to the other nodes
5935 self.cfg.Update(instance, self.feedback_fn)
5937 result = self.rpc.call_finalize_migration(target_node,
5941 msg = result.fail_msg
5943 logging.error("Instance migration succeeded, but finalization failed:"
5945 raise errors.OpExecError("Could not finalize instance migration: %s" %
5948 self._EnsureSecondary(source_node)
5949 self._WaitUntilSync()
5950 self._GoStandalone()
5951 self._GoReconnect(False)
5952 self._WaitUntilSync()
5954 self.feedback_fn("* done")
5956 def Exec(self, feedback_fn):
5957 """Perform the migration.
5960 feedback_fn("Migrating instance %s" % self.instance.name)
5962 self.feedback_fn = feedback_fn
5964 self.source_node = self.instance.primary_node
5965 self.target_node = self.instance.secondary_nodes[0]
5966 self.all_nodes = [self.source_node, self.target_node]
5968 self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5969 self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5973 return self._ExecCleanup()
5975 return self._ExecMigration()
5978 def _CreateBlockDev(lu, node, instance, device, force_create,
5980 """Create a tree of block devices on a given node.
5982 If this device type has to be created on secondaries, create it and
5985 If not, just recurse to children keeping the same 'force' value.
5987 @param lu: the lu on whose behalf we execute
5988 @param node: the node on which to create the device
5989 @type instance: L{objects.Instance}
5990 @param instance: the instance which owns the device
5991 @type device: L{objects.Disk}
5992 @param device: the device to create
5993 @type force_create: boolean
5994 @param force_create: whether to force creation of this device; this
5995 will be change to True whenever we find a device which has
5996 CreateOnSecondary() attribute
5997 @param info: the extra 'metadata' we should attach to the device
5998 (this will be represented as a LVM tag)
5999 @type force_open: boolean
6000 @param force_open: this parameter will be passes to the
6001 L{backend.BlockdevCreate} function where it specifies
6002 whether we run on primary or not, and it affects both
6003 the child assembly and the device own Open() execution
6006 if device.CreateOnSecondary():
6010 for child in device.children:
6011 _CreateBlockDev(lu, node, instance, child, force_create,
6014 if not force_create:
6017 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6020 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6021 """Create a single block device on a given node.
6023 This will not recurse over children of the device, so they must be
6026 @param lu: the lu on whose behalf we execute
6027 @param node: the node on which to create the device
6028 @type instance: L{objects.Instance}
6029 @param instance: the instance which owns the device
6030 @type device: L{objects.Disk}
6031 @param device: the device to create
6032 @param info: the extra 'metadata' we should attach to the device
6033 (this will be represented as a LVM tag)
6034 @type force_open: boolean
6035 @param force_open: this parameter will be passes to the
6036 L{backend.BlockdevCreate} function where it specifies
6037 whether we run on primary or not, and it affects both
6038 the child assembly and the device own Open() execution
6041 lu.cfg.SetDiskID(device, node)
6042 result = lu.rpc.call_blockdev_create(node, device, device.size,
6043 instance.name, force_open, info)
6044 result.Raise("Can't create block device %s on"
6045 " node %s for instance %s" % (device, node, instance.name))
6046 if device.physical_id is None:
6047 device.physical_id = result.payload
6050 def _GenerateUniqueNames(lu, exts):
6051 """Generate a suitable LV name.
6053 This will generate a logical volume name for the given instance.
6058 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6059 results.append("%s%s" % (new_id, val))
6063 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6065 """Generate a drbd8 device complete with its children.
6068 port = lu.cfg.AllocatePort()
6069 vgname = lu.cfg.GetVGName()
6070 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6071 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6072 logical_id=(vgname, names[0]))
6073 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6074 logical_id=(vgname, names[1]))
6075 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6076 logical_id=(primary, secondary, port,
6079 children=[dev_data, dev_meta],
6084 def _GenerateDiskTemplate(lu, template_name,
6085 instance_name, primary_node,
6086 secondary_nodes, disk_info,
6087 file_storage_dir, file_driver,
6089 """Generate the entire disk layout for a given template type.
6092 #TODO: compute space requirements
6094 vgname = lu.cfg.GetVGName()
6095 disk_count = len(disk_info)
6097 if template_name == constants.DT_DISKLESS:
6099 elif template_name == constants.DT_PLAIN:
6100 if len(secondary_nodes) != 0:
6101 raise errors.ProgrammerError("Wrong template configuration")
6103 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6104 for i in range(disk_count)])
6105 for idx, disk in enumerate(disk_info):
6106 disk_index = idx + base_index
6107 disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6108 logical_id=(vgname, names[idx]),
6109 iv_name="disk/%d" % disk_index,
6111 disks.append(disk_dev)
6112 elif template_name == constants.DT_DRBD8:
6113 if len(secondary_nodes) != 1:
6114 raise errors.ProgrammerError("Wrong template configuration")
6115 remote_node = secondary_nodes[0]
6116 minors = lu.cfg.AllocateDRBDMinor(
6117 [primary_node, remote_node] * len(disk_info), instance_name)
6120 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6121 for i in range(disk_count)]):
6122 names.append(lv_prefix + "_data")
6123 names.append(lv_prefix + "_meta")
6124 for idx, disk in enumerate(disk_info):
6125 disk_index = idx + base_index
6126 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6127 disk["size"], names[idx*2:idx*2+2],
6128 "disk/%d" % disk_index,
6129 minors[idx*2], minors[idx*2+1])
6130 disk_dev.mode = disk["mode"]
6131 disks.append(disk_dev)
6132 elif template_name == constants.DT_FILE:
6133 if len(secondary_nodes) != 0:
6134 raise errors.ProgrammerError("Wrong template configuration")
6136 _RequireFileStorage()
6138 for idx, disk in enumerate(disk_info):
6139 disk_index = idx + base_index
6140 disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6141 iv_name="disk/%d" % disk_index,
6142 logical_id=(file_driver,
6143 "%s/disk%d" % (file_storage_dir,
6146 disks.append(disk_dev)
6148 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6152 def _GetInstanceInfoText(instance):
6153 """Compute that text that should be added to the disk's metadata.
6156 return "originstname+%s" % instance.name
6159 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6160 """Create all disks for an instance.
6162 This abstracts away some work from AddInstance.
6164 @type lu: L{LogicalUnit}
6165 @param lu: the logical unit on whose behalf we execute
6166 @type instance: L{objects.Instance}
6167 @param instance: the instance whose disks we should create
6169 @param to_skip: list of indices to skip
6170 @type target_node: string
6171 @param target_node: if passed, overrides the target node for creation
6173 @return: the success of the creation
6176 info = _GetInstanceInfoText(instance)
6177 if target_node is None:
6178 pnode = instance.primary_node
6179 all_nodes = instance.all_nodes
6184 if instance.disk_template == constants.DT_FILE:
6185 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6186 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6188 result.Raise("Failed to create directory '%s' on"
6189 " node %s" % (file_storage_dir, pnode))
6191 # Note: this needs to be kept in sync with adding of disks in
6192 # LUSetInstanceParams
6193 for idx, device in enumerate(instance.disks):
6194 if to_skip and idx in to_skip:
6196 logging.info("Creating volume %s for instance %s",
6197 device.iv_name, instance.name)
6199 for node in all_nodes:
6200 f_create = node == pnode
6201 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6204 def _RemoveDisks(lu, instance, target_node=None):
6205 """Remove all disks for an instance.
6207 This abstracts away some work from `AddInstance()` and
6208 `RemoveInstance()`. Note that in case some of the devices couldn't
6209 be removed, the removal will continue with the other ones (compare
6210 with `_CreateDisks()`).
6212 @type lu: L{LogicalUnit}
6213 @param lu: the logical unit on whose behalf we execute
6214 @type instance: L{objects.Instance}
6215 @param instance: the instance whose disks we should remove
6216 @type target_node: string
6217 @param target_node: used to override the node on which to remove the disks
6219 @return: the success of the removal
6222 logging.info("Removing block devices for instance %s", instance.name)
6225 for device in instance.disks:
6227 edata = [(target_node, device)]
6229 edata = device.ComputeNodeTree(instance.primary_node)
6230 for node, disk in edata:
6231 lu.cfg.SetDiskID(disk, node)
6232 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6234 lu.LogWarning("Could not remove block device %s on node %s,"
6235 " continuing anyway: %s", device.iv_name, node, msg)
6238 if instance.disk_template == constants.DT_FILE:
6239 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6243 tgt = instance.primary_node
6244 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6246 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6247 file_storage_dir, instance.primary_node, result.fail_msg)
6253 def _ComputeDiskSize(disk_template, disks):
6254 """Compute disk size requirements in the volume group
6257 # Required free disk space as a function of disk and swap space
6259 constants.DT_DISKLESS: None,
6260 constants.DT_PLAIN: sum(d["size"] for d in disks),
6261 # 128 MB are added for drbd metadata for each disk
6262 constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6263 constants.DT_FILE: None,
6266 if disk_template not in req_size_dict:
6267 raise errors.ProgrammerError("Disk template '%s' size requirement"
6268 " is unknown" % disk_template)
6270 return req_size_dict[disk_template]
6273 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6274 """Hypervisor parameter validation.
6276 This function abstract the hypervisor parameter validation to be
6277 used in both instance create and instance modify.
6279 @type lu: L{LogicalUnit}
6280 @param lu: the logical unit for which we check
6281 @type nodenames: list
6282 @param nodenames: the list of nodes on which we should check
6283 @type hvname: string
6284 @param hvname: the name of the hypervisor we should use
6285 @type hvparams: dict
6286 @param hvparams: the parameters which we need to check
6287 @raise errors.OpPrereqError: if the parameters are not valid
6290 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6293 for node in nodenames:
6297 info.Raise("Hypervisor parameter validation failed on node %s" % node)
6300 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6301 """OS parameters validation.
6303 @type lu: L{LogicalUnit}
6304 @param lu: the logical unit for which we check
6305 @type required: boolean
6306 @param required: whether the validation should fail if the OS is not
6308 @type nodenames: list
6309 @param nodenames: the list of nodes on which we should check
6310 @type osname: string
6311 @param osname: the name of the hypervisor we should use
6312 @type osparams: dict
6313 @param osparams: the parameters which we need to check
6314 @raise errors.OpPrereqError: if the parameters are not valid
6317 result = lu.rpc.call_os_validate(required, nodenames, osname,
6318 [constants.OS_VALIDATE_PARAMETERS],
6320 for node, nres in result.items():
6321 # we don't check for offline cases since this should be run only
6322 # against the master node and/or an instance's nodes
6323 nres.Raise("OS Parameters validation failed on node %s" % node)
6324 if not nres.payload:
6325 lu.LogInfo("OS %s not found on node %s, validation skipped",
6329 class LUCreateInstance(LogicalUnit):
6330 """Create an instance.
6333 HPATH = "instance-add"
6334 HTYPE = constants.HTYPE_INSTANCE
6336 ("instance_name", _TNonEmptyString),
6337 ("mode", _TElemOf(constants.INSTANCE_CREATE_MODES)),
6339 ("wait_for_sync", _TBool),
6340 ("ip_check", _TBool),
6341 ("disks", _TListOf(_TDict)),
6342 ("nics", _TListOf(_TDict)),
6343 ("hvparams", _TDict),
6344 ("beparams", _TDict),
6345 ("osparams", _TDict),
6348 ("name_check", True),
6349 ("no_install", False),
6351 ("force_variant", False),
6352 ("source_handshake", None),
6353 ("source_x509_ca", None),
6354 ("source_instance_name", None),
6359 ("iallocator", None),
6360 ("hypervisor", None),
6361 ("disk_template", None),
6362 ("identify_defaults", None),
6366 def CheckArguments(self):
6370 # do not require name_check to ease forward/backward compatibility
6372 if self.op.no_install and self.op.start:
6373 self.LogInfo("No-installation mode selected, disabling startup")
6374 self.op.start = False
6375 # validate/normalize the instance name
6376 self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6377 if self.op.ip_check and not self.op.name_check:
6378 # TODO: make the ip check more flexible and not depend on the name check
6379 raise errors.OpPrereqError("Cannot do ip checks without a name check",
6382 # check nics' parameter names
6383 for nic in self.op.nics:
6384 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6386 # check disks. parameter names and consistent adopt/no-adopt strategy
6387 has_adopt = has_no_adopt = False
6388 for disk in self.op.disks:
6389 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6394 if has_adopt and has_no_adopt:
6395 raise errors.OpPrereqError("Either all disks are adopted or none is",
6398 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6399 raise errors.OpPrereqError("Disk adoption is not supported for the"
6400 " '%s' disk template" %
6401 self.op.disk_template,
6403 if self.op.iallocator is not None:
6404 raise errors.OpPrereqError("Disk adoption not allowed with an"
6405 " iallocator script", errors.ECODE_INVAL)
6406 if self.op.mode == constants.INSTANCE_IMPORT:
6407 raise errors.OpPrereqError("Disk adoption not allowed for"
6408 " instance import", errors.ECODE_INVAL)
6410 self.adopt_disks = has_adopt
6412 # instance name verification
6413 if self.op.name_check:
6414 self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6415 self.op.instance_name = self.hostname1.name
6416 # used in CheckPrereq for ip ping check
6417 self.check_ip = self.hostname1.ip
6418 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6419 raise errors.OpPrereqError("Remote imports require names to be checked" %
6422 self.check_ip = None
6424 # file storage checks
6425 if (self.op.file_driver and
6426 not self.op.file_driver in constants.FILE_DRIVER):
6427 raise errors.OpPrereqError("Invalid file driver name '%s'" %
6428 self.op.file_driver, errors.ECODE_INVAL)
6430 if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6431 raise errors.OpPrereqError("File storage directory path not absolute",
6434 ### Node/iallocator related checks
6435 if [self.op.iallocator, self.op.pnode].count(None) != 1:
6436 raise errors.OpPrereqError("One and only one of iallocator and primary"
6437 " node must be given",
6440 self._cds = _GetClusterDomainSecret()
6442 if self.op.mode == constants.INSTANCE_IMPORT:
6443 # On import force_variant must be True, because if we forced it at
6444 # initial install, our only chance when importing it back is that it
6446 self.op.force_variant = True
6448 if self.op.no_install:
6449 self.LogInfo("No-installation mode has no effect during import")
6451 elif self.op.mode == constants.INSTANCE_CREATE:
6452 if self.op.os_type is None:
6453 raise errors.OpPrereqError("No guest OS specified",
6455 if self.op.disk_template is None:
6456 raise errors.OpPrereqError("No disk template specified",
6459 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6460 # Check handshake to ensure both clusters have the same domain secret
6461 src_handshake = self.op.source_handshake
6462 if not src_handshake:
6463 raise errors.OpPrereqError("Missing source handshake",
6466 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6469 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6472 # Load and check source CA
6473 self.source_x509_ca_pem = self.op.source_x509_ca
6474 if not self.source_x509_ca_pem:
6475 raise errors.OpPrereqError("Missing source X509 CA",
6479 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6481 except OpenSSL.crypto.Error, err:
6482 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6483 (err, ), errors.ECODE_INVAL)
6485 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6486 if errcode is not None:
6487 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6490 self.source_x509_ca = cert
6492 src_instance_name = self.op.source_instance_name
6493 if not src_instance_name:
6494 raise errors.OpPrereqError("Missing source instance name",
6497 self.source_instance_name = \
6498 utils.GetHostInfo(utils.HostInfo.NormalizeName(src_instance_name)).name
6501 raise errors.OpPrereqError("Invalid instance creation mode %r" %
6502 self.op.mode, errors.ECODE_INVAL)
6504 def ExpandNames(self):
6505 """ExpandNames for CreateInstance.
6507 Figure out the right locks for instance creation.
6510 self.needed_locks = {}
6512 instance_name = self.op.instance_name
6513 # this is just a preventive check, but someone might still add this
6514 # instance in the meantime, and creation will fail at lock-add time
6515 if instance_name in self.cfg.GetInstanceList():
6516 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6517 instance_name, errors.ECODE_EXISTS)
6519 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6521 if self.op.iallocator:
6522 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6524 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6525 nodelist = [self.op.pnode]
6526 if self.op.snode is not None:
6527 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6528 nodelist.append(self.op.snode)
6529 self.needed_locks[locking.LEVEL_NODE] = nodelist
6531 # in case of import lock the source node too
6532 if self.op.mode == constants.INSTANCE_IMPORT:
6533 src_node = self.op.src_node
6534 src_path = self.op.src_path
6536 if src_path is None:
6537 self.op.src_path = src_path = self.op.instance_name
6539 if src_node is None:
6540 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6541 self.op.src_node = None
6542 if os.path.isabs(src_path):
6543 raise errors.OpPrereqError("Importing an instance from an absolute"
6544 " path requires a source node option.",
6547 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6548 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6549 self.needed_locks[locking.LEVEL_NODE].append(src_node)
6550 if not os.path.isabs(src_path):
6551 self.op.src_path = src_path = \
6552 utils.PathJoin(constants.EXPORT_DIR, src_path)
6554 def _RunAllocator(self):
6555 """Run the allocator based on input opcode.
6558 nics = [n.ToDict() for n in self.nics]
6559 ial = IAllocator(self.cfg, self.rpc,
6560 mode=constants.IALLOCATOR_MODE_ALLOC,
6561 name=self.op.instance_name,
6562 disk_template=self.op.disk_template,
6565 vcpus=self.be_full[constants.BE_VCPUS],
6566 mem_size=self.be_full[constants.BE_MEMORY],
6569 hypervisor=self.op.hypervisor,
6572 ial.Run(self.op.iallocator)
6575 raise errors.OpPrereqError("Can't compute nodes using"
6576 " iallocator '%s': %s" %
6577 (self.op.iallocator, ial.info),
6579 if len(ial.result) != ial.required_nodes:
6580 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6581 " of nodes (%s), required %s" %
6582 (self.op.iallocator, len(ial.result),
6583 ial.required_nodes), errors.ECODE_FAULT)
6584 self.op.pnode = ial.result[0]
6585 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6586 self.op.instance_name, self.op.iallocator,
6587 utils.CommaJoin(ial.result))
6588 if ial.required_nodes == 2:
6589 self.op.snode = ial.result[1]
6591 def BuildHooksEnv(self):
6594 This runs on master, primary and secondary nodes of the instance.
6598 "ADD_MODE": self.op.mode,
6600 if self.op.mode == constants.INSTANCE_IMPORT:
6601 env["SRC_NODE"] = self.op.src_node
6602 env["SRC_PATH"] = self.op.src_path
6603 env["SRC_IMAGES"] = self.src_images
6605 env.update(_BuildInstanceHookEnv(
6606 name=self.op.instance_name,
6607 primary_node=self.op.pnode,
6608 secondary_nodes=self.secondaries,
6609 status=self.op.start,
6610 os_type=self.op.os_type,
6611 memory=self.be_full[constants.BE_MEMORY],
6612 vcpus=self.be_full[constants.BE_VCPUS],
6613 nics=_NICListToTuple(self, self.nics),
6614 disk_template=self.op.disk_template,
6615 disks=[(d["size"], d["mode"]) for d in self.disks],
6618 hypervisor_name=self.op.hypervisor,
6621 nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6625 def _ReadExportInfo(self):
6626 """Reads the export information from disk.
6628 It will override the opcode source node and path with the actual
6629 information, if these two were not specified before.
6631 @return: the export information
6634 assert self.op.mode == constants.INSTANCE_IMPORT
6636 src_node = self.op.src_node
6637 src_path = self.op.src_path
6639 if src_node is None:
6640 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6641 exp_list = self.rpc.call_export_list(locked_nodes)
6643 for node in exp_list:
6644 if exp_list[node].fail_msg:
6646 if src_path in exp_list[node].payload:
6648 self.op.src_node = src_node = node
6649 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6653 raise errors.OpPrereqError("No export found for relative path %s" %
6654 src_path, errors.ECODE_INVAL)
6656 _CheckNodeOnline(self, src_node)
6657 result = self.rpc.call_export_info(src_node, src_path)
6658 result.Raise("No export or invalid export found in dir %s" % src_path)
6660 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6661 if not export_info.has_section(constants.INISECT_EXP):
6662 raise errors.ProgrammerError("Corrupted export config",
6663 errors.ECODE_ENVIRON)
6665 ei_version = export_info.get(constants.INISECT_EXP, "version")
6666 if (int(ei_version) != constants.EXPORT_VERSION):
6667 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6668 (ei_version, constants.EXPORT_VERSION),
6669 errors.ECODE_ENVIRON)
6672 def _ReadExportParams(self, einfo):
6673 """Use export parameters as defaults.
6675 In case the opcode doesn't specify (as in override) some instance
6676 parameters, then try to use them from the export information, if
6680 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6682 if self.op.disk_template is None:
6683 if einfo.has_option(constants.INISECT_INS, "disk_template"):
6684 self.op.disk_template = einfo.get(constants.INISECT_INS,
6687 raise errors.OpPrereqError("No disk template specified and the export"
6688 " is missing the disk_template information",
6691 if not self.op.disks:
6692 if einfo.has_option(constants.INISECT_INS, "disk_count"):
6694 # TODO: import the disk iv_name too
6695 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6696 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6697 disks.append({"size": disk_sz})
6698 self.op.disks = disks
6700 raise errors.OpPrereqError("No disk info specified and the export"
6701 " is missing the disk information",
6704 if (not self.op.nics and
6705 einfo.has_option(constants.INISECT_INS, "nic_count")):
6707 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6709 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6710 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6715 if (self.op.hypervisor is None and
6716 einfo.has_option(constants.INISECT_INS, "hypervisor")):
6717 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6718 if einfo.has_section(constants.INISECT_HYP):
6719 # use the export parameters but do not override the ones
6720 # specified by the user
6721 for name, value in einfo.items(constants.INISECT_HYP):
6722 if name not in self.op.hvparams:
6723 self.op.hvparams[name] = value
6725 if einfo.has_section(constants.INISECT_BEP):
6726 # use the parameters, without overriding
6727 for name, value in einfo.items(constants.INISECT_BEP):
6728 if name not in self.op.beparams:
6729 self.op.beparams[name] = value
6731 # try to read the parameters old style, from the main section
6732 for name in constants.BES_PARAMETERS:
6733 if (name not in self.op.beparams and
6734 einfo.has_option(constants.INISECT_INS, name)):
6735 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6737 if einfo.has_section(constants.INISECT_OSP):
6738 # use the parameters, without overriding
6739 for name, value in einfo.items(constants.INISECT_OSP):
6740 if name not in self.op.osparams:
6741 self.op.osparams[name] = value
6743 def _RevertToDefaults(self, cluster):
6744 """Revert the instance parameters to the default values.
6748 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6749 for name in self.op.hvparams.keys():
6750 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6751 del self.op.hvparams[name]
6753 be_defs = cluster.SimpleFillBE({})
6754 for name in self.op.beparams.keys():
6755 if name in be_defs and be_defs[name] == self.op.beparams[name]:
6756 del self.op.beparams[name]
6758 nic_defs = cluster.SimpleFillNIC({})
6759 for nic in self.op.nics:
6760 for name in constants.NICS_PARAMETERS:
6761 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6764 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6765 for name in self.op.osparams.keys():
6766 if name in os_defs and os_defs[name] == self.op.osparams[name]:
6767 del self.op.osparams[name]
6769 def CheckPrereq(self):
6770 """Check prerequisites.
6773 if self.op.mode == constants.INSTANCE_IMPORT:
6774 export_info = self._ReadExportInfo()
6775 self._ReadExportParams(export_info)
6777 _CheckDiskTemplate(self.op.disk_template)
6779 if (not self.cfg.GetVGName() and
6780 self.op.disk_template not in constants.DTS_NOT_LVM):
6781 raise errors.OpPrereqError("Cluster does not support lvm-based"
6782 " instances", errors.ECODE_STATE)
6784 if self.op.hypervisor is None:
6785 self.op.hypervisor = self.cfg.GetHypervisorType()
6787 cluster = self.cfg.GetClusterInfo()
6788 enabled_hvs = cluster.enabled_hypervisors
6789 if self.op.hypervisor not in enabled_hvs:
6790 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6791 " cluster (%s)" % (self.op.hypervisor,
6792 ",".join(enabled_hvs)),
6795 # check hypervisor parameter syntax (locally)
6796 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6797 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6799 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6800 hv_type.CheckParameterSyntax(filled_hvp)
6801 self.hv_full = filled_hvp
6802 # check that we don't specify global parameters on an instance
6803 _CheckGlobalHvParams(self.op.hvparams)
6805 # fill and remember the beparams dict
6806 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6807 self.be_full = cluster.SimpleFillBE(self.op.beparams)
6809 # build os parameters
6810 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6812 # now that hvp/bep are in final format, let's reset to defaults,
6814 if self.op.identify_defaults:
6815 self._RevertToDefaults(cluster)
6819 for idx, nic in enumerate(self.op.nics):
6820 nic_mode_req = nic.get("mode", None)
6821 nic_mode = nic_mode_req
6822 if nic_mode is None:
6823 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6825 # in routed mode, for the first nic, the default ip is 'auto'
6826 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6827 default_ip_mode = constants.VALUE_AUTO
6829 default_ip_mode = constants.VALUE_NONE
6831 # ip validity checks
6832 ip = nic.get("ip", default_ip_mode)
6833 if ip is None or ip.lower() == constants.VALUE_NONE:
6835 elif ip.lower() == constants.VALUE_AUTO:
6836 if not self.op.name_check:
6837 raise errors.OpPrereqError("IP address set to auto but name checks"
6838 " have been skipped. Aborting.",
6840 nic_ip = self.hostname1.ip
6842 if not utils.IsValidIP(ip):
6843 raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6844 " like a valid IP" % ip,
6848 # TODO: check the ip address for uniqueness
6849 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6850 raise errors.OpPrereqError("Routed nic mode requires an ip address",
6853 # MAC address verification
6854 mac = nic.get("mac", constants.VALUE_AUTO)
6855 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6856 mac = utils.NormalizeAndValidateMac(mac)
6859 self.cfg.ReserveMAC(mac, self.proc.GetECId())
6860 except errors.ReservationError:
6861 raise errors.OpPrereqError("MAC address %s already in use"
6862 " in cluster" % mac,
6863 errors.ECODE_NOTUNIQUE)
6865 # bridge verification
6866 bridge = nic.get("bridge", None)
6867 link = nic.get("link", None)
6869 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6870 " at the same time", errors.ECODE_INVAL)
6871 elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6872 raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6879 nicparams[constants.NIC_MODE] = nic_mode_req
6881 nicparams[constants.NIC_LINK] = link
6883 check_params = cluster.SimpleFillNIC(nicparams)
6884 objects.NIC.CheckParameterSyntax(check_params)
6885 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6887 # disk checks/pre-build
6889 for disk in self.op.disks:
6890 mode = disk.get("mode", constants.DISK_RDWR)
6891 if mode not in constants.DISK_ACCESS_SET:
6892 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6893 mode, errors.ECODE_INVAL)
6894 size = disk.get("size", None)
6896 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6899 except (TypeError, ValueError):
6900 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6902 new_disk = {"size": size, "mode": mode}
6904 new_disk["adopt"] = disk["adopt"]
6905 self.disks.append(new_disk)
6907 if self.op.mode == constants.INSTANCE_IMPORT:
6909 # Check that the new instance doesn't have less disks than the export
6910 instance_disks = len(self.disks)
6911 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6912 if instance_disks < export_disks:
6913 raise errors.OpPrereqError("Not enough disks to import."
6914 " (instance: %d, export: %d)" %
6915 (instance_disks, export_disks),
6919 for idx in range(export_disks):
6920 option = 'disk%d_dump' % idx
6921 if export_info.has_option(constants.INISECT_INS, option):
6922 # FIXME: are the old os-es, disk sizes, etc. useful?
6923 export_name = export_info.get(constants.INISECT_INS, option)
6924 image = utils.PathJoin(self.op.src_path, export_name)
6925 disk_images.append(image)
6927 disk_images.append(False)
6929 self.src_images = disk_images
6931 old_name = export_info.get(constants.INISECT_INS, 'name')
6933 exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6934 except (TypeError, ValueError), err:
6935 raise errors.OpPrereqError("Invalid export file, nic_count is not"
6936 " an integer: %s" % str(err),
6938 if self.op.instance_name == old_name:
6939 for idx, nic in enumerate(self.nics):
6940 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6941 nic_mac_ini = 'nic%d_mac' % idx
6942 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6944 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6946 # ip ping checks (we use the same ip that was resolved in ExpandNames)
6947 if self.op.ip_check:
6948 if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6949 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6950 (self.check_ip, self.op.instance_name),
6951 errors.ECODE_NOTUNIQUE)
6953 #### mac address generation
6954 # By generating here the mac address both the allocator and the hooks get
6955 # the real final mac address rather than the 'auto' or 'generate' value.
6956 # There is a race condition between the generation and the instance object
6957 # creation, which means that we know the mac is valid now, but we're not
6958 # sure it will be when we actually add the instance. If things go bad
6959 # adding the instance will abort because of a duplicate mac, and the
6960 # creation job will fail.
6961 for nic in self.nics:
6962 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6963 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6967 if self.op.iallocator is not None:
6968 self._RunAllocator()
6970 #### node related checks
6972 # check primary node
6973 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6974 assert self.pnode is not None, \
6975 "Cannot retrieve locked node %s" % self.op.pnode
6977 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6978 pnode.name, errors.ECODE_STATE)
6980 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6981 pnode.name, errors.ECODE_STATE)
6983 self.secondaries = []
6985 # mirror node verification
6986 if self.op.disk_template in constants.DTS_NET_MIRROR:
6987 if self.op.snode is None:
6988 raise errors.OpPrereqError("The networked disk templates need"
6989 " a mirror node", errors.ECODE_INVAL)
6990 if self.op.snode == pnode.name:
6991 raise errors.OpPrereqError("The secondary node cannot be the"
6992 " primary node.", errors.ECODE_INVAL)
6993 _CheckNodeOnline(self, self.op.snode)
6994 _CheckNodeNotDrained(self, self.op.snode)
6995 self.secondaries.append(self.op.snode)
6997 nodenames = [pnode.name] + self.secondaries
6999 req_size = _ComputeDiskSize(self.op.disk_template,
7002 # Check lv size requirements, if not adopting
7003 if req_size is not None and not self.adopt_disks:
7004 _CheckNodesFreeDisk(self, nodenames, req_size)
7006 if self.adopt_disks: # instead, we must check the adoption data
7007 all_lvs = set([i["adopt"] for i in self.disks])
7008 if len(all_lvs) != len(self.disks):
7009 raise errors.OpPrereqError("Duplicate volume names given for adoption",
7011 for lv_name in all_lvs:
7013 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7014 except errors.ReservationError:
7015 raise errors.OpPrereqError("LV named %s used by another instance" %
7016 lv_name, errors.ECODE_NOTUNIQUE)
7018 node_lvs = self.rpc.call_lv_list([pnode.name],
7019 self.cfg.GetVGName())[pnode.name]
7020 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7021 node_lvs = node_lvs.payload
7022 delta = all_lvs.difference(node_lvs.keys())
7024 raise errors.OpPrereqError("Missing logical volume(s): %s" %
7025 utils.CommaJoin(delta),
7027 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7029 raise errors.OpPrereqError("Online logical volumes found, cannot"
7030 " adopt: %s" % utils.CommaJoin(online_lvs),
7032 # update the size of disk based on what is found
7033 for dsk in self.disks:
7034 dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7036 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7038 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7039 # check OS parameters (remotely)
7040 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7042 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7044 # memory check on primary node
7046 _CheckNodeFreeMemory(self, self.pnode.name,
7047 "creating instance %s" % self.op.instance_name,
7048 self.be_full[constants.BE_MEMORY],
7051 self.dry_run_result = list(nodenames)
7053 def Exec(self, feedback_fn):
7054 """Create and add the instance to the cluster.
7057 instance = self.op.instance_name
7058 pnode_name = self.pnode.name
7060 ht_kind = self.op.hypervisor
7061 if ht_kind in constants.HTS_REQ_PORT:
7062 network_port = self.cfg.AllocatePort()
7066 if constants.ENABLE_FILE_STORAGE:
7067 # this is needed because os.path.join does not accept None arguments
7068 if self.op.file_storage_dir is None:
7069 string_file_storage_dir = ""
7071 string_file_storage_dir = self.op.file_storage_dir
7073 # build the full file storage dir path
7074 file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7075 string_file_storage_dir, instance)
7077 file_storage_dir = ""
7079 disks = _GenerateDiskTemplate(self,
7080 self.op.disk_template,
7081 instance, pnode_name,
7085 self.op.file_driver,
7088 iobj = objects.Instance(name=instance, os=self.op.os_type,
7089 primary_node=pnode_name,
7090 nics=self.nics, disks=disks,
7091 disk_template=self.op.disk_template,
7093 network_port=network_port,
7094 beparams=self.op.beparams,
7095 hvparams=self.op.hvparams,
7096 hypervisor=self.op.hypervisor,
7097 osparams=self.op.osparams,
7100 if self.adopt_disks:
7101 # rename LVs to the newly-generated names; we need to construct
7102 # 'fake' LV disks with the old data, plus the new unique_id
7103 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7105 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7106 rename_to.append(t_dsk.logical_id)
7107 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7108 self.cfg.SetDiskID(t_dsk, pnode_name)
7109 result = self.rpc.call_blockdev_rename(pnode_name,
7110 zip(tmp_disks, rename_to))
7111 result.Raise("Failed to rename adoped LVs")
7113 feedback_fn("* creating instance disks...")
7115 _CreateDisks(self, iobj)
7116 except errors.OpExecError:
7117 self.LogWarning("Device creation failed, reverting...")
7119 _RemoveDisks(self, iobj)
7121 self.cfg.ReleaseDRBDMinors(instance)
7124 feedback_fn("adding instance %s to cluster config" % instance)
7126 self.cfg.AddInstance(iobj, self.proc.GetECId())
7128 # Declare that we don't want to remove the instance lock anymore, as we've
7129 # added the instance to the config
7130 del self.remove_locks[locking.LEVEL_INSTANCE]
7131 # Unlock all the nodes
7132 if self.op.mode == constants.INSTANCE_IMPORT:
7133 nodes_keep = [self.op.src_node]
7134 nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7135 if node != self.op.src_node]
7136 self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7137 self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7139 self.context.glm.release(locking.LEVEL_NODE)
7140 del self.acquired_locks[locking.LEVEL_NODE]
7142 if self.op.wait_for_sync:
7143 disk_abort = not _WaitForSync(self, iobj)
7144 elif iobj.disk_template in constants.DTS_NET_MIRROR:
7145 # make sure the disks are not degraded (still sync-ing is ok)
7147 feedback_fn("* checking mirrors status")
7148 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7153 _RemoveDisks(self, iobj)
7154 self.cfg.RemoveInstance(iobj.name)
7155 # Make sure the instance lock gets removed
7156 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7157 raise errors.OpExecError("There are some degraded disks for"
7160 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7161 if self.op.mode == constants.INSTANCE_CREATE:
7162 if not self.op.no_install:
7163 feedback_fn("* running the instance OS create scripts...")
7164 # FIXME: pass debug option from opcode to backend
7165 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7166 self.op.debug_level)
7167 result.Raise("Could not add os for instance %s"
7168 " on node %s" % (instance, pnode_name))
7170 elif self.op.mode == constants.INSTANCE_IMPORT:
7171 feedback_fn("* running the instance OS import scripts...")
7175 for idx, image in enumerate(self.src_images):
7179 # FIXME: pass debug option from opcode to backend
7180 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7181 constants.IEIO_FILE, (image, ),
7182 constants.IEIO_SCRIPT,
7183 (iobj.disks[idx], idx),
7185 transfers.append(dt)
7188 masterd.instance.TransferInstanceData(self, feedback_fn,
7189 self.op.src_node, pnode_name,
7190 self.pnode.secondary_ip,
7192 if not compat.all(import_result):
7193 self.LogWarning("Some disks for instance %s on node %s were not"
7194 " imported successfully" % (instance, pnode_name))
7196 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7197 feedback_fn("* preparing remote import...")
7198 connect_timeout = constants.RIE_CONNECT_TIMEOUT
7199 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7201 disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7202 self.source_x509_ca,
7203 self._cds, timeouts)
7204 if not compat.all(disk_results):
7205 # TODO: Should the instance still be started, even if some disks
7206 # failed to import (valid for local imports, too)?
7207 self.LogWarning("Some disks for instance %s on node %s were not"
7208 " imported successfully" % (instance, pnode_name))
7210 # Run rename script on newly imported instance
7211 assert iobj.name == instance
7212 feedback_fn("Running rename script for %s" % instance)
7213 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7214 self.source_instance_name,
7215 self.op.debug_level)
7217 self.LogWarning("Failed to run rename script for %s on node"
7218 " %s: %s" % (instance, pnode_name, result.fail_msg))
7221 # also checked in the prereq part
7222 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7226 iobj.admin_up = True
7227 self.cfg.Update(iobj, feedback_fn)
7228 logging.info("Starting instance %s on node %s", instance, pnode_name)
7229 feedback_fn("* starting instance...")
7230 result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7231 result.Raise("Could not start instance")
7233 return list(iobj.all_nodes)
7236 class LUConnectConsole(NoHooksLU):
7237 """Connect to an instance's console.
7239 This is somewhat special in that it returns the command line that
7240 you need to run on the master node in order to connect to the
7244 _OP_REQP = [("instance_name", _TNonEmptyString)]
7247 def ExpandNames(self):
7248 self._ExpandAndLockInstance()
7250 def CheckPrereq(self):
7251 """Check prerequisites.
7253 This checks that the instance is in the cluster.
7256 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7257 assert self.instance is not None, \
7258 "Cannot retrieve locked instance %s" % self.op.instance_name
7259 _CheckNodeOnline(self, self.instance.primary_node)
7261 def Exec(self, feedback_fn):
7262 """Connect to the console of an instance
7265 instance = self.instance
7266 node = instance.primary_node
7268 node_insts = self.rpc.call_instance_list([node],
7269 [instance.hypervisor])[node]
7270 node_insts.Raise("Can't get node information from %s" % node)
7272 if instance.name not in node_insts.payload:
7273 raise errors.OpExecError("Instance %s is not running." % instance.name)
7275 logging.debug("Connecting to console of %s on %s", instance.name, node)
7277 hyper = hypervisor.GetHypervisor(instance.hypervisor)
7278 cluster = self.cfg.GetClusterInfo()
7279 # beparams and hvparams are passed separately, to avoid editing the
7280 # instance and then saving the defaults in the instance itself.
7281 hvparams = cluster.FillHV(instance)
7282 beparams = cluster.FillBE(instance)
7283 console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7286 return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7289 class LUReplaceDisks(LogicalUnit):
7290 """Replace the disks of an instance.
7293 HPATH = "mirrors-replace"
7294 HTYPE = constants.HTYPE_INSTANCE
7296 ("instance_name", _TNonEmptyString),
7297 ("mode", _TElemOf(constants.REPLACE_MODES)),
7298 ("disks", _TListOf(_TPositiveInt)),
7301 ("remote_node", None),
7302 ("iallocator", None),
7303 ("early_release", None),
7307 def CheckArguments(self):
7308 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7311 def ExpandNames(self):
7312 self._ExpandAndLockInstance()
7314 if self.op.iallocator is not None:
7315 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7317 elif self.op.remote_node is not None:
7318 remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7319 self.op.remote_node = remote_node
7321 # Warning: do not remove the locking of the new secondary here
7322 # unless DRBD8.AddChildren is changed to work in parallel;
7323 # currently it doesn't since parallel invocations of
7324 # FindUnusedMinor will conflict
7325 self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7326 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7329 self.needed_locks[locking.LEVEL_NODE] = []
7330 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7332 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7333 self.op.iallocator, self.op.remote_node,
7334 self.op.disks, False, self.op.early_release)
7336 self.tasklets = [self.replacer]
7338 def DeclareLocks(self, level):
7339 # If we're not already locking all nodes in the set we have to declare the
7340 # instance's primary/secondary nodes.
7341 if (level == locking.LEVEL_NODE and
7342 self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7343 self._LockInstancesNodes()
7345 def BuildHooksEnv(self):
7348 This runs on the master, the primary and all the secondaries.
7351 instance = self.replacer.instance
7353 "MODE": self.op.mode,
7354 "NEW_SECONDARY": self.op.remote_node,
7355 "OLD_SECONDARY": instance.secondary_nodes[0],
7357 env.update(_BuildInstanceHookEnvByObject(self, instance))
7359 self.cfg.GetMasterNode(),
7360 instance.primary_node,
7362 if self.op.remote_node is not None:
7363 nl.append(self.op.remote_node)
7367 class TLReplaceDisks(Tasklet):
7368 """Replaces disks for an instance.
7370 Note: Locking is not within the scope of this class.
7373 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7374 disks, delay_iallocator, early_release):
7375 """Initializes this class.
7378 Tasklet.__init__(self, lu)
7381 self.instance_name = instance_name
7383 self.iallocator_name = iallocator_name
7384 self.remote_node = remote_node
7386 self.delay_iallocator = delay_iallocator
7387 self.early_release = early_release
7390 self.instance = None
7391 self.new_node = None
7392 self.target_node = None
7393 self.other_node = None
7394 self.remote_node_info = None
7395 self.node_secondary_ip = None
7398 def CheckArguments(mode, remote_node, iallocator):
7399 """Helper function for users of this class.
7402 # check for valid parameter combination
7403 if mode == constants.REPLACE_DISK_CHG:
7404 if remote_node is None and iallocator is None:
7405 raise errors.OpPrereqError("When changing the secondary either an"
7406 " iallocator script must be used or the"
7407 " new node given", errors.ECODE_INVAL)
7409 if remote_node is not None and iallocator is not None:
7410 raise errors.OpPrereqError("Give either the iallocator or the new"
7411 " secondary, not both", errors.ECODE_INVAL)
7413 elif remote_node is not None or iallocator is not None:
7414 # Not replacing the secondary
7415 raise errors.OpPrereqError("The iallocator and new node options can"
7416 " only be used when changing the"
7417 " secondary node", errors.ECODE_INVAL)
7420 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7421 """Compute a new secondary node using an IAllocator.
7424 ial = IAllocator(lu.cfg, lu.rpc,
7425 mode=constants.IALLOCATOR_MODE_RELOC,
7427 relocate_from=relocate_from)
7429 ial.Run(iallocator_name)
7432 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7433 " %s" % (iallocator_name, ial.info),
7436 if len(ial.result) != ial.required_nodes:
7437 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7438 " of nodes (%s), required %s" %
7440 len(ial.result), ial.required_nodes),
7443 remote_node_name = ial.result[0]
7445 lu.LogInfo("Selected new secondary for instance '%s': %s",
7446 instance_name, remote_node_name)
7448 return remote_node_name
7450 def _FindFaultyDisks(self, node_name):
7451 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7454 def CheckPrereq(self):
7455 """Check prerequisites.
7457 This checks that the instance is in the cluster.
7460 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7461 assert instance is not None, \
7462 "Cannot retrieve locked instance %s" % self.instance_name
7464 if instance.disk_template != constants.DT_DRBD8:
7465 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7466 " instances", errors.ECODE_INVAL)
7468 if len(instance.secondary_nodes) != 1:
7469 raise errors.OpPrereqError("The instance has a strange layout,"
7470 " expected one secondary but found %d" %
7471 len(instance.secondary_nodes),
7474 if not self.delay_iallocator:
7475 self._CheckPrereq2()
7477 def _CheckPrereq2(self):
7478 """Check prerequisites, second part.
7480 This function should always be part of CheckPrereq. It was separated and is
7481 now called from Exec because during node evacuation iallocator was only
7482 called with an unmodified cluster model, not taking planned changes into
7486 instance = self.instance
7487 secondary_node = instance.secondary_nodes[0]
7489 if self.iallocator_name is None:
7490 remote_node = self.remote_node
7492 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7493 instance.name, instance.secondary_nodes)
7495 if remote_node is not None:
7496 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7497 assert self.remote_node_info is not None, \
7498 "Cannot retrieve locked node %s" % remote_node
7500 self.remote_node_info = None
7502 if remote_node == self.instance.primary_node:
7503 raise errors.OpPrereqError("The specified node is the primary node of"
7504 " the instance.", errors.ECODE_INVAL)
7506 if remote_node == secondary_node:
7507 raise errors.OpPrereqError("The specified node is already the"
7508 " secondary node of the instance.",
7511 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7512 constants.REPLACE_DISK_CHG):
7513 raise errors.OpPrereqError("Cannot specify disks to be replaced",
7516 if self.mode == constants.REPLACE_DISK_AUTO:
7517 faulty_primary = self._FindFaultyDisks(instance.primary_node)
7518 faulty_secondary = self._FindFaultyDisks(secondary_node)
7520 if faulty_primary and faulty_secondary:
7521 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7522 " one node and can not be repaired"
7523 " automatically" % self.instance_name,
7527 self.disks = faulty_primary
7528 self.target_node = instance.primary_node
7529 self.other_node = secondary_node
7530 check_nodes = [self.target_node, self.other_node]
7531 elif faulty_secondary:
7532 self.disks = faulty_secondary
7533 self.target_node = secondary_node
7534 self.other_node = instance.primary_node
7535 check_nodes = [self.target_node, self.other_node]
7541 # Non-automatic modes
7542 if self.mode == constants.REPLACE_DISK_PRI:
7543 self.target_node = instance.primary_node
7544 self.other_node = secondary_node
7545 check_nodes = [self.target_node, self.other_node]
7547 elif self.mode == constants.REPLACE_DISK_SEC:
7548 self.target_node = secondary_node
7549 self.other_node = instance.primary_node
7550 check_nodes = [self.target_node, self.other_node]
7552 elif self.mode == constants.REPLACE_DISK_CHG:
7553 self.new_node = remote_node
7554 self.other_node = instance.primary_node
7555 self.target_node = secondary_node
7556 check_nodes = [self.new_node, self.other_node]
7558 _CheckNodeNotDrained(self.lu, remote_node)
7560 old_node_info = self.cfg.GetNodeInfo(secondary_node)
7561 assert old_node_info is not None
7562 if old_node_info.offline and not self.early_release:
7563 # doesn't make sense to delay the release
7564 self.early_release = True
7565 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7566 " early-release mode", secondary_node)
7569 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7572 # If not specified all disks should be replaced
7574 self.disks = range(len(self.instance.disks))
7576 for node in check_nodes:
7577 _CheckNodeOnline(self.lu, node)
7579 # Check whether disks are valid
7580 for disk_idx in self.disks:
7581 instance.FindDisk(disk_idx)
7583 # Get secondary node IP addresses
7586 for node_name in [self.target_node, self.other_node, self.new_node]:
7587 if node_name is not None:
7588 node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7590 self.node_secondary_ip = node_2nd_ip
7592 def Exec(self, feedback_fn):
7593 """Execute disk replacement.
7595 This dispatches the disk replacement to the appropriate handler.
7598 if self.delay_iallocator:
7599 self._CheckPrereq2()
7602 feedback_fn("No disks need replacement")
7605 feedback_fn("Replacing disk(s) %s for %s" %
7606 (utils.CommaJoin(self.disks), self.instance.name))
7608 activate_disks = (not self.instance.admin_up)
7610 # Activate the instance disks if we're replacing them on a down instance
7612 _StartInstanceDisks(self.lu, self.instance, True)
7615 # Should we replace the secondary node?
7616 if self.new_node is not None:
7617 fn = self._ExecDrbd8Secondary
7619 fn = self._ExecDrbd8DiskOnly
7621 return fn(feedback_fn)
7624 # Deactivate the instance disks if we're replacing them on a
7627 _SafeShutdownInstanceDisks(self.lu, self.instance)
7629 def _CheckVolumeGroup(self, nodes):
7630 self.lu.LogInfo("Checking volume groups")
7632 vgname = self.cfg.GetVGName()
7634 # Make sure volume group exists on all involved nodes
7635 results = self.rpc.call_vg_list(nodes)
7637 raise errors.OpExecError("Can't list volume groups on the nodes")
7641 res.Raise("Error checking node %s" % node)
7642 if vgname not in res.payload:
7643 raise errors.OpExecError("Volume group '%s' not found on node %s" %
7646 def _CheckDisksExistence(self, nodes):
7647 # Check disk existence
7648 for idx, dev in enumerate(self.instance.disks):
7649 if idx not in self.disks:
7653 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7654 self.cfg.SetDiskID(dev, node)
7656 result = self.rpc.call_blockdev_find(node, dev)
7658 msg = result.fail_msg
7659 if msg or not result.payload:
7661 msg = "disk not found"
7662 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7665 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7666 for idx, dev in enumerate(self.instance.disks):
7667 if idx not in self.disks:
7670 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7673 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7675 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7676 " replace disks for instance %s" %
7677 (node_name, self.instance.name))
7679 def _CreateNewStorage(self, node_name):
7680 vgname = self.cfg.GetVGName()
7683 for idx, dev in enumerate(self.instance.disks):
7684 if idx not in self.disks:
7687 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7689 self.cfg.SetDiskID(dev, node_name)
7691 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7692 names = _GenerateUniqueNames(self.lu, lv_names)
7694 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7695 logical_id=(vgname, names[0]))
7696 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7697 logical_id=(vgname, names[1]))
7699 new_lvs = [lv_data, lv_meta]
7700 old_lvs = dev.children
7701 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7703 # we pass force_create=True to force the LVM creation
7704 for new_lv in new_lvs:
7705 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7706 _GetInstanceInfoText(self.instance), False)
7710 def _CheckDevices(self, node_name, iv_names):
7711 for name, (dev, _, _) in iv_names.iteritems():
7712 self.cfg.SetDiskID(dev, node_name)
7714 result = self.rpc.call_blockdev_find(node_name, dev)
7716 msg = result.fail_msg
7717 if msg or not result.payload:
7719 msg = "disk not found"
7720 raise errors.OpExecError("Can't find DRBD device %s: %s" %
7723 if result.payload.is_degraded:
7724 raise errors.OpExecError("DRBD device %s is degraded!" % name)
7726 def _RemoveOldStorage(self, node_name, iv_names):
7727 for name, (_, old_lvs, _) in iv_names.iteritems():
7728 self.lu.LogInfo("Remove logical volumes for %s" % name)
7731 self.cfg.SetDiskID(lv, node_name)
7733 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7735 self.lu.LogWarning("Can't remove old LV: %s" % msg,
7736 hint="remove unused LVs manually")
7738 def _ReleaseNodeLock(self, node_name):
7739 """Releases the lock for a given node."""
7740 self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7742 def _ExecDrbd8DiskOnly(self, feedback_fn):
7743 """Replace a disk on the primary or secondary for DRBD 8.
7745 The algorithm for replace is quite complicated:
7747 1. for each disk to be replaced:
7749 1. create new LVs on the target node with unique names
7750 1. detach old LVs from the drbd device
7751 1. rename old LVs to name_replaced.<time_t>
7752 1. rename new LVs to old LVs
7753 1. attach the new LVs (with the old names now) to the drbd device
7755 1. wait for sync across all devices
7757 1. for each modified disk:
7759 1. remove old LVs (which have the name name_replaces.<time_t>)
7761 Failures are not very well handled.
7766 # Step: check device activation
7767 self.lu.LogStep(1, steps_total, "Check device existence")
7768 self._CheckDisksExistence([self.other_node, self.target_node])
7769 self._CheckVolumeGroup([self.target_node, self.other_node])
7771 # Step: check other node consistency
7772 self.lu.LogStep(2, steps_total, "Check peer consistency")
7773 self._CheckDisksConsistency(self.other_node,
7774 self.other_node == self.instance.primary_node,
7777 # Step: create new storage
7778 self.lu.LogStep(3, steps_total, "Allocate new storage")
7779 iv_names = self._CreateNewStorage(self.target_node)
7781 # Step: for each lv, detach+rename*2+attach
7782 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7783 for dev, old_lvs, new_lvs in iv_names.itervalues():
7784 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7786 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7788 result.Raise("Can't detach drbd from local storage on node"
7789 " %s for device %s" % (self.target_node, dev.iv_name))
7791 #cfg.Update(instance)
7793 # ok, we created the new LVs, so now we know we have the needed
7794 # storage; as such, we proceed on the target node to rename
7795 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7796 # using the assumption that logical_id == physical_id (which in
7797 # turn is the unique_id on that node)
7799 # FIXME(iustin): use a better name for the replaced LVs
7800 temp_suffix = int(time.time())
7801 ren_fn = lambda d, suff: (d.physical_id[0],
7802 d.physical_id[1] + "_replaced-%s" % suff)
7804 # Build the rename list based on what LVs exist on the node
7805 rename_old_to_new = []
7806 for to_ren in old_lvs:
7807 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7808 if not result.fail_msg and result.payload:
7810 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7812 self.lu.LogInfo("Renaming the old LVs on the target node")
7813 result = self.rpc.call_blockdev_rename(self.target_node,
7815 result.Raise("Can't rename old LVs on node %s" % self.target_node)
7817 # Now we rename the new LVs to the old LVs
7818 self.lu.LogInfo("Renaming the new LVs on the target node")
7819 rename_new_to_old = [(new, old.physical_id)
7820 for old, new in zip(old_lvs, new_lvs)]
7821 result = self.rpc.call_blockdev_rename(self.target_node,
7823 result.Raise("Can't rename new LVs on node %s" % self.target_node)
7825 for old, new in zip(old_lvs, new_lvs):
7826 new.logical_id = old.logical_id
7827 self.cfg.SetDiskID(new, self.target_node)
7829 for disk in old_lvs:
7830 disk.logical_id = ren_fn(disk, temp_suffix)
7831 self.cfg.SetDiskID(disk, self.target_node)
7833 # Now that the new lvs have the old name, we can add them to the device
7834 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7835 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7837 msg = result.fail_msg
7839 for new_lv in new_lvs:
7840 msg2 = self.rpc.call_blockdev_remove(self.target_node,
7843 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7844 hint=("cleanup manually the unused logical"
7846 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7848 dev.children = new_lvs
7850 self.cfg.Update(self.instance, feedback_fn)
7853 if self.early_release:
7854 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7856 self._RemoveOldStorage(self.target_node, iv_names)
7857 # WARNING: we release both node locks here, do not do other RPCs
7858 # than WaitForSync to the primary node
7859 self._ReleaseNodeLock([self.target_node, self.other_node])
7862 # This can fail as the old devices are degraded and _WaitForSync
7863 # does a combined result over all disks, so we don't check its return value
7864 self.lu.LogStep(cstep, steps_total, "Sync devices")
7866 _WaitForSync(self.lu, self.instance)
7868 # Check all devices manually
7869 self._CheckDevices(self.instance.primary_node, iv_names)
7871 # Step: remove old storage
7872 if not self.early_release:
7873 self.lu.LogStep(cstep, steps_total, "Removing old storage")
7875 self._RemoveOldStorage(self.target_node, iv_names)
7877 def _ExecDrbd8Secondary(self, feedback_fn):
7878 """Replace the secondary node for DRBD 8.
7880 The algorithm for replace is quite complicated:
7881 - for all disks of the instance:
7882 - create new LVs on the new node with same names
7883 - shutdown the drbd device on the old secondary
7884 - disconnect the drbd network on the primary
7885 - create the drbd device on the new secondary
7886 - network attach the drbd on the primary, using an artifice:
7887 the drbd code for Attach() will connect to the network if it
7888 finds a device which is connected to the good local disks but
7890 - wait for sync across all devices
7891 - remove all disks from the old secondary
7893 Failures are not very well handled.
7898 # Step: check device activation
7899 self.lu.LogStep(1, steps_total, "Check device existence")
7900 self._CheckDisksExistence([self.instance.primary_node])
7901 self._CheckVolumeGroup([self.instance.primary_node])
7903 # Step: check other node consistency
7904 self.lu.LogStep(2, steps_total, "Check peer consistency")
7905 self._CheckDisksConsistency(self.instance.primary_node, True, True)
7907 # Step: create new storage
7908 self.lu.LogStep(3, steps_total, "Allocate new storage")
7909 for idx, dev in enumerate(self.instance.disks):
7910 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7911 (self.new_node, idx))
7912 # we pass force_create=True to force LVM creation
7913 for new_lv in dev.children:
7914 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7915 _GetInstanceInfoText(self.instance), False)
7917 # Step 4: dbrd minors and drbd setups changes
7918 # after this, we must manually remove the drbd minors on both the
7919 # error and the success paths
7920 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7921 minors = self.cfg.AllocateDRBDMinor([self.new_node
7922 for dev in self.instance.disks],
7924 logging.debug("Allocated minors %r", minors)
7927 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7928 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7929 (self.new_node, idx))
7930 # create new devices on new_node; note that we create two IDs:
7931 # one without port, so the drbd will be activated without
7932 # networking information on the new node at this stage, and one
7933 # with network, for the latter activation in step 4
7934 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7935 if self.instance.primary_node == o_node1:
7938 assert self.instance.primary_node == o_node2, "Three-node instance?"
7941 new_alone_id = (self.instance.primary_node, self.new_node, None,
7942 p_minor, new_minor, o_secret)
7943 new_net_id = (self.instance.primary_node, self.new_node, o_port,
7944 p_minor, new_minor, o_secret)
7946 iv_names[idx] = (dev, dev.children, new_net_id)
7947 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7949 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7950 logical_id=new_alone_id,
7951 children=dev.children,
7954 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7955 _GetInstanceInfoText(self.instance), False)
7956 except errors.GenericError:
7957 self.cfg.ReleaseDRBDMinors(self.instance.name)
7960 # We have new devices, shutdown the drbd on the old secondary
7961 for idx, dev in enumerate(self.instance.disks):
7962 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7963 self.cfg.SetDiskID(dev, self.target_node)
7964 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7966 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7967 "node: %s" % (idx, msg),
7968 hint=("Please cleanup this device manually as"
7969 " soon as possible"))
7971 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7972 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7973 self.node_secondary_ip,
7974 self.instance.disks)\
7975 [self.instance.primary_node]
7977 msg = result.fail_msg
7979 # detaches didn't succeed (unlikely)
7980 self.cfg.ReleaseDRBDMinors(self.instance.name)
7981 raise errors.OpExecError("Can't detach the disks from the network on"
7982 " old node: %s" % (msg,))
7984 # if we managed to detach at least one, we update all the disks of
7985 # the instance to point to the new secondary
7986 self.lu.LogInfo("Updating instance configuration")
7987 for dev, _, new_logical_id in iv_names.itervalues():
7988 dev.logical_id = new_logical_id
7989 self.cfg.SetDiskID(dev, self.instance.primary_node)
7991 self.cfg.Update(self.instance, feedback_fn)
7993 # and now perform the drbd attach
7994 self.lu.LogInfo("Attaching primary drbds to new secondary"
7995 " (standalone => connected)")
7996 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7998 self.node_secondary_ip,
7999 self.instance.disks,
8002 for to_node, to_result in result.items():
8003 msg = to_result.fail_msg
8005 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8007 hint=("please do a gnt-instance info to see the"
8008 " status of disks"))
8010 if self.early_release:
8011 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8013 self._RemoveOldStorage(self.target_node, iv_names)
8014 # WARNING: we release all node locks here, do not do other RPCs
8015 # than WaitForSync to the primary node
8016 self._ReleaseNodeLock([self.instance.primary_node,
8021 # This can fail as the old devices are degraded and _WaitForSync
8022 # does a combined result over all disks, so we don't check its return value
8023 self.lu.LogStep(cstep, steps_total, "Sync devices")
8025 _WaitForSync(self.lu, self.instance)
8027 # Check all devices manually
8028 self._CheckDevices(self.instance.primary_node, iv_names)
8030 # Step: remove old storage
8031 if not self.early_release:
8032 self.lu.LogStep(cstep, steps_total, "Removing old storage")
8033 self._RemoveOldStorage(self.target_node, iv_names)
8036 class LURepairNodeStorage(NoHooksLU):
8037 """Repairs the volume group on a node.
8040 _OP_REQP = [("node_name", _TNonEmptyString)]
8043 def CheckArguments(self):
8044 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8046 _CheckStorageType(self.op.storage_type)
8048 storage_type = self.op.storage_type
8050 if (constants.SO_FIX_CONSISTENCY not in
8051 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8052 raise errors.OpPrereqError("Storage units of type '%s' can not be"
8053 " repaired" % storage_type,
8056 def ExpandNames(self):
8057 self.needed_locks = {
8058 locking.LEVEL_NODE: [self.op.node_name],
8061 def _CheckFaultyDisks(self, instance, node_name):
8062 """Ensure faulty disks abort the opcode or at least warn."""
8064 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8066 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8067 " node '%s'" % (instance.name, node_name),
8069 except errors.OpPrereqError, err:
8070 if self.op.ignore_consistency:
8071 self.proc.LogWarning(str(err.args[0]))
8075 def CheckPrereq(self):
8076 """Check prerequisites.
8079 # Check whether any instance on this node has faulty disks
8080 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8081 if not inst.admin_up:
8083 check_nodes = set(inst.all_nodes)
8084 check_nodes.discard(self.op.node_name)
8085 for inst_node_name in check_nodes:
8086 self._CheckFaultyDisks(inst, inst_node_name)
8088 def Exec(self, feedback_fn):
8089 feedback_fn("Repairing storage unit '%s' on %s ..." %
8090 (self.op.name, self.op.node_name))
8092 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8093 result = self.rpc.call_storage_execute(self.op.node_name,
8094 self.op.storage_type, st_args,
8096 constants.SO_FIX_CONSISTENCY)
8097 result.Raise("Failed to repair storage unit '%s' on %s" %
8098 (self.op.name, self.op.node_name))
8101 class LUNodeEvacuationStrategy(NoHooksLU):
8102 """Computes the node evacuation strategy.
8105 _OP_REQP = [("nodes", _TListOf(_TNonEmptyString))]
8107 ("remote_node", None),
8108 ("iallocator", None),
8112 def CheckArguments(self):
8113 if self.op.remote_node is not None and self.op.iallocator is not None:
8114 raise errors.OpPrereqError("Give either the iallocator or the new"
8115 " secondary, not both", errors.ECODE_INVAL)
8117 def ExpandNames(self):
8118 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8119 self.needed_locks = locks = {}
8120 if self.op.remote_node is None:
8121 locks[locking.LEVEL_NODE] = locking.ALL_SET
8123 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8124 locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8126 def Exec(self, feedback_fn):
8127 if self.op.remote_node is not None:
8129 for node in self.op.nodes:
8130 instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8133 if i.primary_node == self.op.remote_node:
8134 raise errors.OpPrereqError("Node %s is the primary node of"
8135 " instance %s, cannot use it as"
8137 (self.op.remote_node, i.name),
8139 result.append([i.name, self.op.remote_node])
8141 ial = IAllocator(self.cfg, self.rpc,
8142 mode=constants.IALLOCATOR_MODE_MEVAC,
8143 evac_nodes=self.op.nodes)
8144 ial.Run(self.op.iallocator, validate=True)
8146 raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8152 class LUGrowDisk(LogicalUnit):
8153 """Grow a disk of an instance.
8157 HTYPE = constants.HTYPE_INSTANCE
8159 ("instance_name", _TNonEmptyString),
8162 ("wait_for_sync", _TBool),
8166 def ExpandNames(self):
8167 self._ExpandAndLockInstance()
8168 self.needed_locks[locking.LEVEL_NODE] = []
8169 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8171 def DeclareLocks(self, level):
8172 if level == locking.LEVEL_NODE:
8173 self._LockInstancesNodes()
8175 def BuildHooksEnv(self):
8178 This runs on the master, the primary and all the secondaries.
8182 "DISK": self.op.disk,
8183 "AMOUNT": self.op.amount,
8185 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8186 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8189 def CheckPrereq(self):
8190 """Check prerequisites.
8192 This checks that the instance is in the cluster.
8195 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8196 assert instance is not None, \
8197 "Cannot retrieve locked instance %s" % self.op.instance_name
8198 nodenames = list(instance.all_nodes)
8199 for node in nodenames:
8200 _CheckNodeOnline(self, node)
8202 self.instance = instance
8204 if instance.disk_template not in constants.DTS_GROWABLE:
8205 raise errors.OpPrereqError("Instance's disk layout does not support"
8206 " growing.", errors.ECODE_INVAL)
8208 self.disk = instance.FindDisk(self.op.disk)
8210 if instance.disk_template != constants.DT_FILE:
8211 # TODO: check the free disk space for file, when that feature will be
8213 _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8215 def Exec(self, feedback_fn):
8216 """Execute disk grow.
8219 instance = self.instance
8222 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8224 raise errors.OpExecError("Cannot activate block device to grow")
8226 for node in instance.all_nodes:
8227 self.cfg.SetDiskID(disk, node)
8228 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8229 result.Raise("Grow request failed to node %s" % node)
8231 # TODO: Rewrite code to work properly
8232 # DRBD goes into sync mode for a short amount of time after executing the
8233 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8234 # calling "resize" in sync mode fails. Sleeping for a short amount of
8235 # time is a work-around.
8238 disk.RecordGrow(self.op.amount)
8239 self.cfg.Update(instance, feedback_fn)
8240 if self.op.wait_for_sync:
8241 disk_abort = not _WaitForSync(self, instance, disks=[disk])
8243 self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8244 " status.\nPlease check the instance.")
8245 if not instance.admin_up:
8246 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8247 elif not instance.admin_up:
8248 self.proc.LogWarning("Not shutting down the disk even if the instance is"
8249 " not supposed to be running because no wait for"
8250 " sync mode was requested.")
8253 class LUQueryInstanceData(NoHooksLU):
8254 """Query runtime instance data.
8258 ("instances", _TListOf(_TNonEmptyString)),
8263 def ExpandNames(self):
8264 self.needed_locks = {}
8265 self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8267 if self.op.instances:
8268 self.wanted_names = []
8269 for name in self.op.instances:
8270 full_name = _ExpandInstanceName(self.cfg, name)
8271 self.wanted_names.append(full_name)
8272 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8274 self.wanted_names = None
8275 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8277 self.needed_locks[locking.LEVEL_NODE] = []
8278 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8280 def DeclareLocks(self, level):
8281 if level == locking.LEVEL_NODE:
8282 self._LockInstancesNodes()
8284 def CheckPrereq(self):
8285 """Check prerequisites.
8287 This only checks the optional instance list against the existing names.
8290 if self.wanted_names is None:
8291 self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8293 self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8294 in self.wanted_names]
8296 def _ComputeBlockdevStatus(self, node, instance_name, dev):
8297 """Returns the status of a block device
8300 if self.op.static or not node:
8303 self.cfg.SetDiskID(dev, node)
8305 result = self.rpc.call_blockdev_find(node, dev)
8309 result.Raise("Can't compute disk status for %s" % instance_name)
8311 status = result.payload
8315 return (status.dev_path, status.major, status.minor,
8316 status.sync_percent, status.estimated_time,
8317 status.is_degraded, status.ldisk_status)
8319 def _ComputeDiskStatus(self, instance, snode, dev):
8320 """Compute block device status.
8323 if dev.dev_type in constants.LDS_DRBD:
8324 # we change the snode then (otherwise we use the one passed in)
8325 if dev.logical_id[0] == instance.primary_node:
8326 snode = dev.logical_id[1]
8328 snode = dev.logical_id[0]
8330 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8332 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8335 dev_children = [self._ComputeDiskStatus(instance, snode, child)
8336 for child in dev.children]
8341 "iv_name": dev.iv_name,
8342 "dev_type": dev.dev_type,
8343 "logical_id": dev.logical_id,
8344 "physical_id": dev.physical_id,
8345 "pstatus": dev_pstatus,
8346 "sstatus": dev_sstatus,
8347 "children": dev_children,
8354 def Exec(self, feedback_fn):
8355 """Gather and return data"""
8358 cluster = self.cfg.GetClusterInfo()
8360 for instance in self.wanted_instances:
8361 if not self.op.static:
8362 remote_info = self.rpc.call_instance_info(instance.primary_node,
8364 instance.hypervisor)
8365 remote_info.Raise("Error checking node %s" % instance.primary_node)
8366 remote_info = remote_info.payload
8367 if remote_info and "state" in remote_info:
8370 remote_state = "down"
8373 if instance.admin_up:
8376 config_state = "down"
8378 disks = [self._ComputeDiskStatus(instance, None, device)
8379 for device in instance.disks]
8382 "name": instance.name,
8383 "config_state": config_state,
8384 "run_state": remote_state,
8385 "pnode": instance.primary_node,
8386 "snodes": instance.secondary_nodes,
8388 # this happens to be the same format used for hooks
8389 "nics": _NICListToTuple(self, instance.nics),
8390 "disk_template": instance.disk_template,
8392 "hypervisor": instance.hypervisor,
8393 "network_port": instance.network_port,
8394 "hv_instance": instance.hvparams,
8395 "hv_actual": cluster.FillHV(instance, skip_globals=True),
8396 "be_instance": instance.beparams,
8397 "be_actual": cluster.FillBE(instance),
8398 "os_instance": instance.osparams,
8399 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8400 "serial_no": instance.serial_no,
8401 "mtime": instance.mtime,
8402 "ctime": instance.ctime,
8403 "uuid": instance.uuid,
8406 result[instance.name] = idict
8411 class LUSetInstanceParams(LogicalUnit):
8412 """Modifies an instances's parameters.
8415 HPATH = "instance-modify"
8416 HTYPE = constants.HTYPE_INSTANCE
8417 _OP_REQP = [("instance_name", _TNonEmptyString)]
8419 ("nics", _EmptyList),
8420 ("disks", _EmptyList),
8421 ("beparams", _EmptyDict),
8422 ("hvparams", _EmptyDict),
8423 ("disk_template", None),
8424 ("remote_node", None),
8426 ("force_variant", False),
8432 def CheckArguments(self):
8433 if not (self.op.nics or self.op.disks or self.op.disk_template or
8434 self.op.hvparams or self.op.beparams or self.op.os_name):
8435 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8437 if self.op.hvparams:
8438 _CheckGlobalHvParams(self.op.hvparams)
8442 for disk_op, disk_dict in self.op.disks:
8443 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8444 if disk_op == constants.DDM_REMOVE:
8447 elif disk_op == constants.DDM_ADD:
8450 if not isinstance(disk_op, int):
8451 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8452 if not isinstance(disk_dict, dict):
8453 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8454 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8456 if disk_op == constants.DDM_ADD:
8457 mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8458 if mode not in constants.DISK_ACCESS_SET:
8459 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8461 size = disk_dict.get('size', None)
8463 raise errors.OpPrereqError("Required disk parameter size missing",
8467 except (TypeError, ValueError), err:
8468 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8469 str(err), errors.ECODE_INVAL)
8470 disk_dict['size'] = size
8472 # modification of disk
8473 if 'size' in disk_dict:
8474 raise errors.OpPrereqError("Disk size change not possible, use"
8475 " grow-disk", errors.ECODE_INVAL)
8477 if disk_addremove > 1:
8478 raise errors.OpPrereqError("Only one disk add or remove operation"
8479 " supported at a time", errors.ECODE_INVAL)
8481 if self.op.disks and self.op.disk_template is not None:
8482 raise errors.OpPrereqError("Disk template conversion and other disk"
8483 " changes not supported at the same time",
8486 if self.op.disk_template:
8487 _CheckDiskTemplate(self.op.disk_template)
8488 if (self.op.disk_template in constants.DTS_NET_MIRROR and
8489 self.op.remote_node is None):
8490 raise errors.OpPrereqError("Changing the disk template to a mirrored"
8491 " one requires specifying a secondary node",
8496 for nic_op, nic_dict in self.op.nics:
8497 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8498 if nic_op == constants.DDM_REMOVE:
8501 elif nic_op == constants.DDM_ADD:
8504 if not isinstance(nic_op, int):
8505 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8506 if not isinstance(nic_dict, dict):
8507 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8508 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8510 # nic_dict should be a dict
8511 nic_ip = nic_dict.get('ip', None)
8512 if nic_ip is not None:
8513 if nic_ip.lower() == constants.VALUE_NONE:
8514 nic_dict['ip'] = None
8516 if not utils.IsValidIP(nic_ip):
8517 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8520 nic_bridge = nic_dict.get('bridge', None)
8521 nic_link = nic_dict.get('link', None)
8522 if nic_bridge and nic_link:
8523 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8524 " at the same time", errors.ECODE_INVAL)
8525 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8526 nic_dict['bridge'] = None
8527 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8528 nic_dict['link'] = None
8530 if nic_op == constants.DDM_ADD:
8531 nic_mac = nic_dict.get('mac', None)
8533 nic_dict['mac'] = constants.VALUE_AUTO
8535 if 'mac' in nic_dict:
8536 nic_mac = nic_dict['mac']
8537 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8538 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8540 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8541 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8542 " modifying an existing nic",
8545 if nic_addremove > 1:
8546 raise errors.OpPrereqError("Only one NIC add or remove operation"
8547 " supported at a time", errors.ECODE_INVAL)
8549 def ExpandNames(self):
8550 self._ExpandAndLockInstance()
8551 self.needed_locks[locking.LEVEL_NODE] = []
8552 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8554 def DeclareLocks(self, level):
8555 if level == locking.LEVEL_NODE:
8556 self._LockInstancesNodes()
8557 if self.op.disk_template and self.op.remote_node:
8558 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8559 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8561 def BuildHooksEnv(self):
8564 This runs on the master, primary and secondaries.
8568 if constants.BE_MEMORY in self.be_new:
8569 args['memory'] = self.be_new[constants.BE_MEMORY]
8570 if constants.BE_VCPUS in self.be_new:
8571 args['vcpus'] = self.be_new[constants.BE_VCPUS]
8572 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8573 # information at all.
8576 nic_override = dict(self.op.nics)
8577 for idx, nic in enumerate(self.instance.nics):
8578 if idx in nic_override:
8579 this_nic_override = nic_override[idx]
8581 this_nic_override = {}
8582 if 'ip' in this_nic_override:
8583 ip = this_nic_override['ip']
8586 if 'mac' in this_nic_override:
8587 mac = this_nic_override['mac']
8590 if idx in self.nic_pnew:
8591 nicparams = self.nic_pnew[idx]
8593 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8594 mode = nicparams[constants.NIC_MODE]
8595 link = nicparams[constants.NIC_LINK]
8596 args['nics'].append((ip, mac, mode, link))
8597 if constants.DDM_ADD in nic_override:
8598 ip = nic_override[constants.DDM_ADD].get('ip', None)
8599 mac = nic_override[constants.DDM_ADD]['mac']
8600 nicparams = self.nic_pnew[constants.DDM_ADD]
8601 mode = nicparams[constants.NIC_MODE]
8602 link = nicparams[constants.NIC_LINK]
8603 args['nics'].append((ip, mac, mode, link))
8604 elif constants.DDM_REMOVE in nic_override:
8605 del args['nics'][-1]
8607 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8608 if self.op.disk_template:
8609 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8610 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8613 def CheckPrereq(self):
8614 """Check prerequisites.
8616 This only checks the instance list against the existing names.
8619 # checking the new params on the primary/secondary nodes
8621 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8622 cluster = self.cluster = self.cfg.GetClusterInfo()
8623 assert self.instance is not None, \
8624 "Cannot retrieve locked instance %s" % self.op.instance_name
8625 pnode = instance.primary_node
8626 nodelist = list(instance.all_nodes)
8629 if self.op.os_name and not self.op.force:
8630 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8631 self.op.force_variant)
8632 instance_os = self.op.os_name
8634 instance_os = instance.os
8636 if self.op.disk_template:
8637 if instance.disk_template == self.op.disk_template:
8638 raise errors.OpPrereqError("Instance already has disk template %s" %
8639 instance.disk_template, errors.ECODE_INVAL)
8641 if (instance.disk_template,
8642 self.op.disk_template) not in self._DISK_CONVERSIONS:
8643 raise errors.OpPrereqError("Unsupported disk template conversion from"
8644 " %s to %s" % (instance.disk_template,
8645 self.op.disk_template),
8647 if self.op.disk_template in constants.DTS_NET_MIRROR:
8648 _CheckNodeOnline(self, self.op.remote_node)
8649 _CheckNodeNotDrained(self, self.op.remote_node)
8650 disks = [{"size": d.size} for d in instance.disks]
8651 required = _ComputeDiskSize(self.op.disk_template, disks)
8652 _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8653 _CheckInstanceDown(self, instance, "cannot change disk template")
8655 # hvparams processing
8656 if self.op.hvparams:
8657 hv_type = instance.hypervisor
8658 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8659 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8660 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8663 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8664 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8665 self.hv_new = hv_new # the new actual values
8666 self.hv_inst = i_hvdict # the new dict (without defaults)
8668 self.hv_new = self.hv_inst = {}
8670 # beparams processing
8671 if self.op.beparams:
8672 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8674 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8675 be_new = cluster.SimpleFillBE(i_bedict)
8676 self.be_new = be_new # the new actual values
8677 self.be_inst = i_bedict # the new dict (without defaults)
8679 self.be_new = self.be_inst = {}
8681 # osparams processing
8682 if self.op.osparams:
8683 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8684 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8685 self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8686 self.os_inst = i_osdict # the new dict (without defaults)
8688 self.os_new = self.os_inst = {}
8692 if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8693 mem_check_list = [pnode]
8694 if be_new[constants.BE_AUTO_BALANCE]:
8695 # either we changed auto_balance to yes or it was from before
8696 mem_check_list.extend(instance.secondary_nodes)
8697 instance_info = self.rpc.call_instance_info(pnode, instance.name,
8698 instance.hypervisor)
8699 nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8700 instance.hypervisor)
8701 pninfo = nodeinfo[pnode]
8702 msg = pninfo.fail_msg
8704 # Assume the primary node is unreachable and go ahead
8705 self.warn.append("Can't get info from primary node %s: %s" %
8707 elif not isinstance(pninfo.payload.get('memory_free', None), int):
8708 self.warn.append("Node data from primary node %s doesn't contain"
8709 " free memory information" % pnode)
8710 elif instance_info.fail_msg:
8711 self.warn.append("Can't get instance runtime information: %s" %
8712 instance_info.fail_msg)
8714 if instance_info.payload:
8715 current_mem = int(instance_info.payload['memory'])
8717 # Assume instance not running
8718 # (there is a slight race condition here, but it's not very probable,
8719 # and we have no other way to check)
8721 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8722 pninfo.payload['memory_free'])
8724 raise errors.OpPrereqError("This change will prevent the instance"
8725 " from starting, due to %d MB of memory"
8726 " missing on its primary node" % miss_mem,
8729 if be_new[constants.BE_AUTO_BALANCE]:
8730 for node, nres in nodeinfo.items():
8731 if node not in instance.secondary_nodes:
8735 self.warn.append("Can't get info from secondary node %s: %s" %
8737 elif not isinstance(nres.payload.get('memory_free', None), int):
8738 self.warn.append("Secondary node %s didn't return free"
8739 " memory information" % node)
8740 elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8741 self.warn.append("Not enough memory to failover instance to"
8742 " secondary node %s" % node)
8747 for nic_op, nic_dict in self.op.nics:
8748 if nic_op == constants.DDM_REMOVE:
8749 if not instance.nics:
8750 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8753 if nic_op != constants.DDM_ADD:
8755 if not instance.nics:
8756 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8757 " no NICs" % nic_op,
8759 if nic_op < 0 or nic_op >= len(instance.nics):
8760 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8762 (nic_op, len(instance.nics) - 1),
8764 old_nic_params = instance.nics[nic_op].nicparams
8765 old_nic_ip = instance.nics[nic_op].ip
8770 update_params_dict = dict([(key, nic_dict[key])
8771 for key in constants.NICS_PARAMETERS
8772 if key in nic_dict])
8774 if 'bridge' in nic_dict:
8775 update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8777 new_nic_params = _GetUpdatedParams(old_nic_params,
8779 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8780 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8781 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8782 self.nic_pinst[nic_op] = new_nic_params
8783 self.nic_pnew[nic_op] = new_filled_nic_params
8784 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8786 if new_nic_mode == constants.NIC_MODE_BRIDGED:
8787 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8788 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8790 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8792 self.warn.append(msg)
8794 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8795 if new_nic_mode == constants.NIC_MODE_ROUTED:
8796 if 'ip' in nic_dict:
8797 nic_ip = nic_dict['ip']
8801 raise errors.OpPrereqError('Cannot set the nic ip to None'
8802 ' on a routed nic', errors.ECODE_INVAL)
8803 if 'mac' in nic_dict:
8804 nic_mac = nic_dict['mac']
8806 raise errors.OpPrereqError('Cannot set the nic mac to None',
8808 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8809 # otherwise generate the mac
8810 nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8812 # or validate/reserve the current one
8814 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8815 except errors.ReservationError:
8816 raise errors.OpPrereqError("MAC address %s already in use"
8817 " in cluster" % nic_mac,
8818 errors.ECODE_NOTUNIQUE)
8821 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8822 raise errors.OpPrereqError("Disk operations not supported for"
8823 " diskless instances",
8825 for disk_op, _ in self.op.disks:
8826 if disk_op == constants.DDM_REMOVE:
8827 if len(instance.disks) == 1:
8828 raise errors.OpPrereqError("Cannot remove the last disk of"
8829 " an instance", errors.ECODE_INVAL)
8830 _CheckInstanceDown(self, instance, "cannot remove disks")
8832 if (disk_op == constants.DDM_ADD and
8833 len(instance.nics) >= constants.MAX_DISKS):
8834 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8835 " add more" % constants.MAX_DISKS,
8837 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8839 if disk_op < 0 or disk_op >= len(instance.disks):
8840 raise errors.OpPrereqError("Invalid disk index %s, valid values"
8842 (disk_op, len(instance.disks)),
8847 def _ConvertPlainToDrbd(self, feedback_fn):
8848 """Converts an instance from plain to drbd.
8851 feedback_fn("Converting template to drbd")
8852 instance = self.instance
8853 pnode = instance.primary_node
8854 snode = self.op.remote_node
8856 # create a fake disk info for _GenerateDiskTemplate
8857 disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8858 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8859 instance.name, pnode, [snode],
8860 disk_info, None, None, 0)
8861 info = _GetInstanceInfoText(instance)
8862 feedback_fn("Creating aditional volumes...")
8863 # first, create the missing data and meta devices
8864 for disk in new_disks:
8865 # unfortunately this is... not too nice
8866 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8868 for child in disk.children:
8869 _CreateSingleBlockDev(self, snode, instance, child, info, True)
8870 # at this stage, all new LVs have been created, we can rename the
8872 feedback_fn("Renaming original volumes...")
8873 rename_list = [(o, n.children[0].logical_id)
8874 for (o, n) in zip(instance.disks, new_disks)]
8875 result = self.rpc.call_blockdev_rename(pnode, rename_list)
8876 result.Raise("Failed to rename original LVs")
8878 feedback_fn("Initializing DRBD devices...")
8879 # all child devices are in place, we can now create the DRBD devices
8880 for disk in new_disks:
8881 for node in [pnode, snode]:
8882 f_create = node == pnode
8883 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8885 # at this point, the instance has been modified
8886 instance.disk_template = constants.DT_DRBD8
8887 instance.disks = new_disks
8888 self.cfg.Update(instance, feedback_fn)
8890 # disks are created, waiting for sync
8891 disk_abort = not _WaitForSync(self, instance)
8893 raise errors.OpExecError("There are some degraded disks for"
8894 " this instance, please cleanup manually")
8896 def _ConvertDrbdToPlain(self, feedback_fn):
8897 """Converts an instance from drbd to plain.
8900 instance = self.instance
8901 assert len(instance.secondary_nodes) == 1
8902 pnode = instance.primary_node
8903 snode = instance.secondary_nodes[0]
8904 feedback_fn("Converting template to plain")
8906 old_disks = instance.disks
8907 new_disks = [d.children[0] for d in old_disks]
8909 # copy over size and mode
8910 for parent, child in zip(old_disks, new_disks):
8911 child.size = parent.size
8912 child.mode = parent.mode
8914 # update instance structure
8915 instance.disks = new_disks
8916 instance.disk_template = constants.DT_PLAIN
8917 self.cfg.Update(instance, feedback_fn)
8919 feedback_fn("Removing volumes on the secondary node...")
8920 for disk in old_disks:
8921 self.cfg.SetDiskID(disk, snode)
8922 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8924 self.LogWarning("Could not remove block device %s on node %s,"
8925 " continuing anyway: %s", disk.iv_name, snode, msg)
8927 feedback_fn("Removing unneeded volumes on the primary node...")
8928 for idx, disk in enumerate(old_disks):
8929 meta = disk.children[1]
8930 self.cfg.SetDiskID(meta, pnode)
8931 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8933 self.LogWarning("Could not remove metadata for disk %d on node %s,"
8934 " continuing anyway: %s", idx, pnode, msg)
8937 def Exec(self, feedback_fn):
8938 """Modifies an instance.
8940 All parameters take effect only at the next restart of the instance.
8943 # Process here the warnings from CheckPrereq, as we don't have a
8944 # feedback_fn there.
8945 for warn in self.warn:
8946 feedback_fn("WARNING: %s" % warn)
8949 instance = self.instance
8951 for disk_op, disk_dict in self.op.disks:
8952 if disk_op == constants.DDM_REMOVE:
8953 # remove the last disk
8954 device = instance.disks.pop()
8955 device_idx = len(instance.disks)
8956 for node, disk in device.ComputeNodeTree(instance.primary_node):
8957 self.cfg.SetDiskID(disk, node)
8958 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8960 self.LogWarning("Could not remove disk/%d on node %s: %s,"
8961 " continuing anyway", device_idx, node, msg)
8962 result.append(("disk/%d" % device_idx, "remove"))
8963 elif disk_op == constants.DDM_ADD:
8965 if instance.disk_template == constants.DT_FILE:
8966 file_driver, file_path = instance.disks[0].logical_id
8967 file_path = os.path.dirname(file_path)
8969 file_driver = file_path = None
8970 disk_idx_base = len(instance.disks)
8971 new_disk = _GenerateDiskTemplate(self,
8972 instance.disk_template,
8973 instance.name, instance.primary_node,
8974 instance.secondary_nodes,
8979 instance.disks.append(new_disk)
8980 info = _GetInstanceInfoText(instance)
8982 logging.info("Creating volume %s for instance %s",
8983 new_disk.iv_name, instance.name)
8984 # Note: this needs to be kept in sync with _CreateDisks
8986 for node in instance.all_nodes:
8987 f_create = node == instance.primary_node
8989 _CreateBlockDev(self, node, instance, new_disk,
8990 f_create, info, f_create)
8991 except errors.OpExecError, err:
8992 self.LogWarning("Failed to create volume %s (%s) on"
8994 new_disk.iv_name, new_disk, node, err)
8995 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8996 (new_disk.size, new_disk.mode)))
8998 # change a given disk
8999 instance.disks[disk_op].mode = disk_dict['mode']
9000 result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9002 if self.op.disk_template:
9003 r_shut = _ShutdownInstanceDisks(self, instance)
9005 raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9006 " proceed with disk template conversion")
9007 mode = (instance.disk_template, self.op.disk_template)
9009 self._DISK_CONVERSIONS[mode](self, feedback_fn)
9011 self.cfg.ReleaseDRBDMinors(instance.name)
9013 result.append(("disk_template", self.op.disk_template))
9016 for nic_op, nic_dict in self.op.nics:
9017 if nic_op == constants.DDM_REMOVE:
9018 # remove the last nic
9019 del instance.nics[-1]
9020 result.append(("nic.%d" % len(instance.nics), "remove"))
9021 elif nic_op == constants.DDM_ADD:
9022 # mac and bridge should be set, by now
9023 mac = nic_dict['mac']
9024 ip = nic_dict.get('ip', None)
9025 nicparams = self.nic_pinst[constants.DDM_ADD]
9026 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9027 instance.nics.append(new_nic)
9028 result.append(("nic.%d" % (len(instance.nics) - 1),
9029 "add:mac=%s,ip=%s,mode=%s,link=%s" %
9030 (new_nic.mac, new_nic.ip,
9031 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9032 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9035 for key in 'mac', 'ip':
9037 setattr(instance.nics[nic_op], key, nic_dict[key])
9038 if nic_op in self.nic_pinst:
9039 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9040 for key, val in nic_dict.iteritems():
9041 result.append(("nic.%s/%d" % (key, nic_op), val))
9044 if self.op.hvparams:
9045 instance.hvparams = self.hv_inst
9046 for key, val in self.op.hvparams.iteritems():
9047 result.append(("hv/%s" % key, val))
9050 if self.op.beparams:
9051 instance.beparams = self.be_inst
9052 for key, val in self.op.beparams.iteritems():
9053 result.append(("be/%s" % key, val))
9057 instance.os = self.op.os_name
9060 if self.op.osparams:
9061 instance.osparams = self.os_inst
9062 for key, val in self.op.osparams.iteritems():
9063 result.append(("os/%s" % key, val))
9065 self.cfg.Update(instance, feedback_fn)
9069 _DISK_CONVERSIONS = {
9070 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9071 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9075 class LUQueryExports(NoHooksLU):
9076 """Query the exports list
9079 _OP_REQP = [("nodes", _TListOf(_TNonEmptyString))]
9082 def ExpandNames(self):
9083 self.needed_locks = {}
9084 self.share_locks[locking.LEVEL_NODE] = 1
9085 if not self.op.nodes:
9086 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9088 self.needed_locks[locking.LEVEL_NODE] = \
9089 _GetWantedNodes(self, self.op.nodes)
9091 def Exec(self, feedback_fn):
9092 """Compute the list of all the exported system images.
9095 @return: a dictionary with the structure node->(export-list)
9096 where export-list is a list of the instances exported on
9100 self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9101 rpcresult = self.rpc.call_export_list(self.nodes)
9103 for node in rpcresult:
9104 if rpcresult[node].fail_msg:
9105 result[node] = False
9107 result[node] = rpcresult[node].payload
9112 class LUPrepareExport(NoHooksLU):
9113 """Prepares an instance for an export and returns useful information.
9117 ("instance_name", _TNonEmptyString),
9118 ("mode", _TElemOf(constants.EXPORT_MODES)),
9122 def ExpandNames(self):
9123 self._ExpandAndLockInstance()
9125 def CheckPrereq(self):
9126 """Check prerequisites.
9129 instance_name = self.op.instance_name
9131 self.instance = self.cfg.GetInstanceInfo(instance_name)
9132 assert self.instance is not None, \
9133 "Cannot retrieve locked instance %s" % self.op.instance_name
9134 _CheckNodeOnline(self, self.instance.primary_node)
9136 self._cds = _GetClusterDomainSecret()
9138 def Exec(self, feedback_fn):
9139 """Prepares an instance for an export.
9142 instance = self.instance
9144 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9145 salt = utils.GenerateSecret(8)
9147 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9148 result = self.rpc.call_x509_cert_create(instance.primary_node,
9149 constants.RIE_CERT_VALIDITY)
9150 result.Raise("Can't create X509 key and certificate on %s" % result.node)
9152 (name, cert_pem) = result.payload
9154 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9158 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9159 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9161 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9167 class LUExportInstance(LogicalUnit):
9168 """Export an instance to an image in the cluster.
9171 HPATH = "instance-export"
9172 HTYPE = constants.HTYPE_INSTANCE
9174 ("instance_name", _TNonEmptyString),
9175 ("target_node", _TNonEmptyString),
9176 ("shutdown", _TBool),
9177 ("mode", _TElemOf(constants.EXPORT_MODES)),
9180 ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT),
9181 ("remove_instance", False),
9182 ("ignore_remove_failures", False),
9183 ("mode", constants.EXPORT_MODE_LOCAL),
9184 ("x509_key_name", None),
9185 ("destination_x509_ca", None),
9189 def CheckArguments(self):
9190 """Check the arguments.
9193 self.x509_key_name = self.op.x509_key_name
9194 self.dest_x509_ca_pem = self.op.destination_x509_ca
9196 if self.op.remove_instance and not self.op.shutdown:
9197 raise errors.OpPrereqError("Can not remove instance without shutting it"
9200 if self.op.mode == constants.EXPORT_MODE_REMOTE:
9201 if not self.x509_key_name:
9202 raise errors.OpPrereqError("Missing X509 key name for encryption",
9205 if not self.dest_x509_ca_pem:
9206 raise errors.OpPrereqError("Missing destination X509 CA",
9209 def ExpandNames(self):
9210 self._ExpandAndLockInstance()
9212 # Lock all nodes for local exports
9213 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9214 # FIXME: lock only instance primary and destination node
9216 # Sad but true, for now we have do lock all nodes, as we don't know where
9217 # the previous export might be, and in this LU we search for it and
9218 # remove it from its current node. In the future we could fix this by:
9219 # - making a tasklet to search (share-lock all), then create the
9220 # new one, then one to remove, after
9221 # - removing the removal operation altogether
9222 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9224 def DeclareLocks(self, level):
9225 """Last minute lock declaration."""
9226 # All nodes are locked anyway, so nothing to do here.
9228 def BuildHooksEnv(self):
9231 This will run on the master, primary node and target node.
9235 "EXPORT_MODE": self.op.mode,
9236 "EXPORT_NODE": self.op.target_node,
9237 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9238 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9239 # TODO: Generic function for boolean env variables
9240 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9243 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9245 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9247 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9248 nl.append(self.op.target_node)
9252 def CheckPrereq(self):
9253 """Check prerequisites.
9255 This checks that the instance and node names are valid.
9258 instance_name = self.op.instance_name
9260 self.instance = self.cfg.GetInstanceInfo(instance_name)
9261 assert self.instance is not None, \
9262 "Cannot retrieve locked instance %s" % self.op.instance_name
9263 _CheckNodeOnline(self, self.instance.primary_node)
9265 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9266 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9267 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9268 assert self.dst_node is not None
9270 _CheckNodeOnline(self, self.dst_node.name)
9271 _CheckNodeNotDrained(self, self.dst_node.name)
9274 self.dest_disk_info = None
9275 self.dest_x509_ca = None
9277 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9278 self.dst_node = None
9280 if len(self.op.target_node) != len(self.instance.disks):
9281 raise errors.OpPrereqError(("Received destination information for %s"
9282 " disks, but instance %s has %s disks") %
9283 (len(self.op.target_node), instance_name,
9284 len(self.instance.disks)),
9287 cds = _GetClusterDomainSecret()
9289 # Check X509 key name
9291 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9292 except (TypeError, ValueError), err:
9293 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9295 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9296 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9299 # Load and verify CA
9301 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9302 except OpenSSL.crypto.Error, err:
9303 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9304 (err, ), errors.ECODE_INVAL)
9306 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9307 if errcode is not None:
9308 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9309 (msg, ), errors.ECODE_INVAL)
9311 self.dest_x509_ca = cert
9313 # Verify target information
9315 for idx, disk_data in enumerate(self.op.target_node):
9317 (host, port, magic) = \
9318 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9319 except errors.GenericError, err:
9320 raise errors.OpPrereqError("Target info for disk %s: %s" %
9321 (idx, err), errors.ECODE_INVAL)
9323 disk_info.append((host, port, magic))
9325 assert len(disk_info) == len(self.op.target_node)
9326 self.dest_disk_info = disk_info
9329 raise errors.ProgrammerError("Unhandled export mode %r" %
9332 # instance disk type verification
9333 # TODO: Implement export support for file-based disks
9334 for disk in self.instance.disks:
9335 if disk.dev_type == constants.LD_FILE:
9336 raise errors.OpPrereqError("Export not supported for instances with"
9337 " file-based disks", errors.ECODE_INVAL)
9339 def _CleanupExports(self, feedback_fn):
9340 """Removes exports of current instance from all other nodes.
9342 If an instance in a cluster with nodes A..D was exported to node C, its
9343 exports will be removed from the nodes A, B and D.
9346 assert self.op.mode != constants.EXPORT_MODE_REMOTE
9348 nodelist = self.cfg.GetNodeList()
9349 nodelist.remove(self.dst_node.name)
9351 # on one-node clusters nodelist will be empty after the removal
9352 # if we proceed the backup would be removed because OpQueryExports
9353 # substitutes an empty list with the full cluster node list.
9354 iname = self.instance.name
9356 feedback_fn("Removing old exports for instance %s" % iname)
9357 exportlist = self.rpc.call_export_list(nodelist)
9358 for node in exportlist:
9359 if exportlist[node].fail_msg:
9361 if iname in exportlist[node].payload:
9362 msg = self.rpc.call_export_remove(node, iname).fail_msg
9364 self.LogWarning("Could not remove older export for instance %s"
9365 " on node %s: %s", iname, node, msg)
9367 def Exec(self, feedback_fn):
9368 """Export an instance to an image in the cluster.
9371 assert self.op.mode in constants.EXPORT_MODES
9373 instance = self.instance
9374 src_node = instance.primary_node
9376 if self.op.shutdown:
9377 # shutdown the instance, but not the disks
9378 feedback_fn("Shutting down instance %s" % instance.name)
9379 result = self.rpc.call_instance_shutdown(src_node, instance,
9380 self.op.shutdown_timeout)
9381 # TODO: Maybe ignore failures if ignore_remove_failures is set
9382 result.Raise("Could not shutdown instance %s on"
9383 " node %s" % (instance.name, src_node))
9385 # set the disks ID correctly since call_instance_start needs the
9386 # correct drbd minor to create the symlinks
9387 for disk in instance.disks:
9388 self.cfg.SetDiskID(disk, src_node)
9390 activate_disks = (not instance.admin_up)
9393 # Activate the instance disks if we'exporting a stopped instance
9394 feedback_fn("Activating disks for %s" % instance.name)
9395 _StartInstanceDisks(self, instance, None)
9398 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9401 helper.CreateSnapshots()
9403 if (self.op.shutdown and instance.admin_up and
9404 not self.op.remove_instance):
9405 assert not activate_disks
9406 feedback_fn("Starting instance %s" % instance.name)
9407 result = self.rpc.call_instance_start(src_node, instance, None, None)
9408 msg = result.fail_msg
9410 feedback_fn("Failed to start instance: %s" % msg)
9411 _ShutdownInstanceDisks(self, instance)
9412 raise errors.OpExecError("Could not start instance: %s" % msg)
9414 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9415 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9416 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9417 connect_timeout = constants.RIE_CONNECT_TIMEOUT
9418 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9420 (key_name, _, _) = self.x509_key_name
9423 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9426 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9427 key_name, dest_ca_pem,
9432 # Check for backwards compatibility
9433 assert len(dresults) == len(instance.disks)
9434 assert compat.all(isinstance(i, bool) for i in dresults), \
9435 "Not all results are boolean: %r" % dresults
9439 feedback_fn("Deactivating disks for %s" % instance.name)
9440 _ShutdownInstanceDisks(self, instance)
9442 # Remove instance if requested
9443 if self.op.remove_instance:
9444 if not (compat.all(dresults) and fin_resu):
9445 feedback_fn("Not removing instance %s as parts of the export failed" %
9448 feedback_fn("Removing instance %s" % instance.name)
9449 _RemoveInstance(self, feedback_fn, instance,
9450 self.op.ignore_remove_failures)
9452 if self.op.mode == constants.EXPORT_MODE_LOCAL:
9453 self._CleanupExports(feedback_fn)
9455 return fin_resu, dresults
9458 class LURemoveExport(NoHooksLU):
9459 """Remove exports related to the named instance.
9462 _OP_REQP = [("instance_name", _TNonEmptyString)]
9465 def ExpandNames(self):
9466 self.needed_locks = {}
9467 # We need all nodes to be locked in order for RemoveExport to work, but we
9468 # don't need to lock the instance itself, as nothing will happen to it (and
9469 # we can remove exports also for a removed instance)
9470 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9472 def Exec(self, feedback_fn):
9473 """Remove any export.
9476 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9477 # If the instance was not found we'll try with the name that was passed in.
9478 # This will only work if it was an FQDN, though.
9480 if not instance_name:
9482 instance_name = self.op.instance_name
9484 locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9485 exportlist = self.rpc.call_export_list(locked_nodes)
9487 for node in exportlist:
9488 msg = exportlist[node].fail_msg
9490 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9492 if instance_name in exportlist[node].payload:
9494 result = self.rpc.call_export_remove(node, instance_name)
9495 msg = result.fail_msg
9497 logging.error("Could not remove export for instance %s"
9498 " on node %s: %s", instance_name, node, msg)
9500 if fqdn_warn and not found:
9501 feedback_fn("Export not found. If trying to remove an export belonging"
9502 " to a deleted instance please use its Fully Qualified"
9506 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9509 This is an abstract class which is the parent of all the other tags LUs.
9513 def ExpandNames(self):
9514 self.needed_locks = {}
9515 if self.op.kind == constants.TAG_NODE:
9516 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9517 self.needed_locks[locking.LEVEL_NODE] = self.op.name
9518 elif self.op.kind == constants.TAG_INSTANCE:
9519 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9520 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9522 def CheckPrereq(self):
9523 """Check prerequisites.
9526 if self.op.kind == constants.TAG_CLUSTER:
9527 self.target = self.cfg.GetClusterInfo()
9528 elif self.op.kind == constants.TAG_NODE:
9529 self.target = self.cfg.GetNodeInfo(self.op.name)
9530 elif self.op.kind == constants.TAG_INSTANCE:
9531 self.target = self.cfg.GetInstanceInfo(self.op.name)
9533 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9534 str(self.op.kind), errors.ECODE_INVAL)
9537 class LUGetTags(TagsLU):
9538 """Returns the tags of a given object.
9542 ("kind", _TElemOf(constants.VALID_TAG_TYPES)),
9543 ("name", _TNonEmptyString),
9547 def Exec(self, feedback_fn):
9548 """Returns the tag list.
9551 return list(self.target.GetTags())
9554 class LUSearchTags(NoHooksLU):
9555 """Searches the tags for a given pattern.
9558 _OP_REQP = [("pattern", _TNonEmptyString)]
9561 def ExpandNames(self):
9562 self.needed_locks = {}
9564 def CheckPrereq(self):
9565 """Check prerequisites.
9567 This checks the pattern passed for validity by compiling it.
9571 self.re = re.compile(self.op.pattern)
9572 except re.error, err:
9573 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9574 (self.op.pattern, err), errors.ECODE_INVAL)
9576 def Exec(self, feedback_fn):
9577 """Returns the tag list.
9581 tgts = [("/cluster", cfg.GetClusterInfo())]
9582 ilist = cfg.GetAllInstancesInfo().values()
9583 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9584 nlist = cfg.GetAllNodesInfo().values()
9585 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9587 for path, target in tgts:
9588 for tag in target.GetTags():
9589 if self.re.search(tag):
9590 results.append((path, tag))
9594 class LUAddTags(TagsLU):
9595 """Sets a tag on a given object.
9599 ("kind", _TElemOf(constants.VALID_TAG_TYPES)),
9600 ("name", _TNonEmptyString),
9601 ("tags", _TListOf(objects.TaggableObject.ValidateTag)),
9605 def CheckPrereq(self):
9606 """Check prerequisites.
9608 This checks the type and length of the tag name and value.
9611 TagsLU.CheckPrereq(self)
9612 for tag in self.op.tags:
9613 objects.TaggableObject.ValidateTag(tag)
9615 def Exec(self, feedback_fn):
9620 for tag in self.op.tags:
9621 self.target.AddTag(tag)
9622 except errors.TagError, err:
9623 raise errors.OpExecError("Error while setting tag: %s" % str(err))
9624 self.cfg.Update(self.target, feedback_fn)
9627 class LUDelTags(TagsLU):
9628 """Delete a list of tags from a given object.
9632 ("kind", _TElemOf(constants.VALID_TAG_TYPES)),
9633 ("name", _TNonEmptyString),
9634 ("tags", _TListOf(objects.TaggableObject.ValidateTag)),
9638 def CheckPrereq(self):
9639 """Check prerequisites.
9641 This checks that we have the given tag.
9644 TagsLU.CheckPrereq(self)
9645 for tag in self.op.tags:
9646 objects.TaggableObject.ValidateTag(tag)
9647 del_tags = frozenset(self.op.tags)
9648 cur_tags = self.target.GetTags()
9649 if not del_tags <= cur_tags:
9650 diff_tags = del_tags - cur_tags
9651 diff_names = ["'%s'" % tag for tag in diff_tags]
9653 raise errors.OpPrereqError("Tag(s) %s not found" %
9654 (",".join(diff_names)), errors.ECODE_NOENT)
9656 def Exec(self, feedback_fn):
9657 """Remove the tag from the object.
9660 for tag in self.op.tags:
9661 self.target.RemoveTag(tag)
9662 self.cfg.Update(self.target, feedback_fn)
9665 class LUTestDelay(NoHooksLU):
9666 """Sleep for a specified amount of time.
9668 This LU sleeps on the master and/or nodes for a specified amount of
9673 ("duration", _TFloat),
9674 ("on_master", _TBool),
9675 ("on_nodes", _TListOf(_TNonEmptyString)),
9676 ("repeat", _TPositiveInt)
9683 def ExpandNames(self):
9684 """Expand names and set required locks.
9686 This expands the node list, if any.
9689 self.needed_locks = {}
9690 if self.op.on_nodes:
9691 # _GetWantedNodes can be used here, but is not always appropriate to use
9692 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9694 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9695 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9697 def _TestDelay(self):
9698 """Do the actual sleep.
9701 if self.op.on_master:
9702 if not utils.TestDelay(self.op.duration):
9703 raise errors.OpExecError("Error during master delay test")
9704 if self.op.on_nodes:
9705 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9706 for node, node_result in result.items():
9707 node_result.Raise("Failure during rpc call to node %s" % node)
9709 def Exec(self, feedback_fn):
9710 """Execute the test delay opcode, with the wanted repetitions.
9713 if self.op.repeat == 0:
9716 top_value = self.op.repeat - 1
9717 for i in range(self.op.repeat):
9718 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9722 class IAllocator(object):
9723 """IAllocator framework.
9725 An IAllocator instance has three sets of attributes:
9726 - cfg that is needed to query the cluster
9727 - input data (all members of the _KEYS class attribute are required)
9728 - four buffer attributes (in|out_data|text), that represent the
9729 input (to the external script) in text and data structure format,
9730 and the output from it, again in two formats
9731 - the result variables from the script (success, info, nodes) for
9735 # pylint: disable-msg=R0902
9736 # lots of instance attributes
9738 "name", "mem_size", "disks", "disk_template",
9739 "os", "tags", "nics", "vcpus", "hypervisor",
9742 "name", "relocate_from",
9748 def __init__(self, cfg, rpc, mode, **kwargs):
9751 # init buffer variables
9752 self.in_text = self.out_text = self.in_data = self.out_data = None
9753 # init all input fields so that pylint is happy
9755 self.mem_size = self.disks = self.disk_template = None
9756 self.os = self.tags = self.nics = self.vcpus = None
9757 self.hypervisor = None
9758 self.relocate_from = None
9760 self.evac_nodes = None
9762 self.required_nodes = None
9763 # init result fields
9764 self.success = self.info = self.result = None
9765 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9766 keyset = self._ALLO_KEYS
9767 fn = self._AddNewInstance
9768 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9769 keyset = self._RELO_KEYS
9770 fn = self._AddRelocateInstance
9771 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9772 keyset = self._EVAC_KEYS
9773 fn = self._AddEvacuateNodes
9775 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9776 " IAllocator" % self.mode)
9778 if key not in keyset:
9779 raise errors.ProgrammerError("Invalid input parameter '%s' to"
9780 " IAllocator" % key)
9781 setattr(self, key, kwargs[key])
9784 if key not in kwargs:
9785 raise errors.ProgrammerError("Missing input parameter '%s' to"
9786 " IAllocator" % key)
9787 self._BuildInputData(fn)
9789 def _ComputeClusterData(self):
9790 """Compute the generic allocator input data.
9792 This is the data that is independent of the actual operation.
9796 cluster_info = cfg.GetClusterInfo()
9799 "version": constants.IALLOCATOR_VERSION,
9800 "cluster_name": cfg.GetClusterName(),
9801 "cluster_tags": list(cluster_info.GetTags()),
9802 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9803 # we don't have job IDs
9805 iinfo = cfg.GetAllInstancesInfo().values()
9806 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9810 node_list = cfg.GetNodeList()
9812 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9813 hypervisor_name = self.hypervisor
9814 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9815 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9816 elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9817 hypervisor_name = cluster_info.enabled_hypervisors[0]
9819 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9822 self.rpc.call_all_instances_info(node_list,
9823 cluster_info.enabled_hypervisors)
9824 for nname, nresult in node_data.items():
9825 # first fill in static (config-based) values
9826 ninfo = cfg.GetNodeInfo(nname)
9828 "tags": list(ninfo.GetTags()),
9829 "primary_ip": ninfo.primary_ip,
9830 "secondary_ip": ninfo.secondary_ip,
9831 "offline": ninfo.offline,
9832 "drained": ninfo.drained,
9833 "master_candidate": ninfo.master_candidate,
9836 if not (ninfo.offline or ninfo.drained):
9837 nresult.Raise("Can't get data for node %s" % nname)
9838 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9840 remote_info = nresult.payload
9842 for attr in ['memory_total', 'memory_free', 'memory_dom0',
9843 'vg_size', 'vg_free', 'cpu_total']:
9844 if attr not in remote_info:
9845 raise errors.OpExecError("Node '%s' didn't return attribute"
9846 " '%s'" % (nname, attr))
9847 if not isinstance(remote_info[attr], int):
9848 raise errors.OpExecError("Node '%s' returned invalid value"
9850 (nname, attr, remote_info[attr]))
9851 # compute memory used by primary instances
9852 i_p_mem = i_p_up_mem = 0
9853 for iinfo, beinfo in i_list:
9854 if iinfo.primary_node == nname:
9855 i_p_mem += beinfo[constants.BE_MEMORY]
9856 if iinfo.name not in node_iinfo[nname].payload:
9859 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9860 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9861 remote_info['memory_free'] -= max(0, i_mem_diff)
9864 i_p_up_mem += beinfo[constants.BE_MEMORY]
9866 # compute memory used by instances
9868 "total_memory": remote_info['memory_total'],
9869 "reserved_memory": remote_info['memory_dom0'],
9870 "free_memory": remote_info['memory_free'],
9871 "total_disk": remote_info['vg_size'],
9872 "free_disk": remote_info['vg_free'],
9873 "total_cpus": remote_info['cpu_total'],
9874 "i_pri_memory": i_p_mem,
9875 "i_pri_up_memory": i_p_up_mem,
9879 node_results[nname] = pnr
9880 data["nodes"] = node_results
9884 for iinfo, beinfo in i_list:
9886 for nic in iinfo.nics:
9887 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
9888 nic_dict = {"mac": nic.mac,
9890 "mode": filled_params[constants.NIC_MODE],
9891 "link": filled_params[constants.NIC_LINK],
9893 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9894 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9895 nic_data.append(nic_dict)
9897 "tags": list(iinfo.GetTags()),
9898 "admin_up": iinfo.admin_up,
9899 "vcpus": beinfo[constants.BE_VCPUS],
9900 "memory": beinfo[constants.BE_MEMORY],
9902 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9904 "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9905 "disk_template": iinfo.disk_template,
9906 "hypervisor": iinfo.hypervisor,
9908 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9910 instance_data[iinfo.name] = pir
9912 data["instances"] = instance_data
9916 def _AddNewInstance(self):
9917 """Add new instance data to allocator structure.
9919 This in combination with _AllocatorGetClusterData will create the
9920 correct structure needed as input for the allocator.
9922 The checks for the completeness of the opcode must have already been
9926 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9928 if self.disk_template in constants.DTS_NET_MIRROR:
9929 self.required_nodes = 2
9931 self.required_nodes = 1
9934 "disk_template": self.disk_template,
9937 "vcpus": self.vcpus,
9938 "memory": self.mem_size,
9939 "disks": self.disks,
9940 "disk_space_total": disk_space,
9942 "required_nodes": self.required_nodes,
9946 def _AddRelocateInstance(self):
9947 """Add relocate instance data to allocator structure.
9949 This in combination with _IAllocatorGetClusterData will create the
9950 correct structure needed as input for the allocator.
9952 The checks for the completeness of the opcode must have already been
9956 instance = self.cfg.GetInstanceInfo(self.name)
9957 if instance is None:
9958 raise errors.ProgrammerError("Unknown instance '%s' passed to"
9959 " IAllocator" % self.name)
9961 if instance.disk_template not in constants.DTS_NET_MIRROR:
9962 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9965 if len(instance.secondary_nodes) != 1:
9966 raise errors.OpPrereqError("Instance has not exactly one secondary node",
9969 self.required_nodes = 1
9970 disk_sizes = [{'size': disk.size} for disk in instance.disks]
9971 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9975 "disk_space_total": disk_space,
9976 "required_nodes": self.required_nodes,
9977 "relocate_from": self.relocate_from,
9981 def _AddEvacuateNodes(self):
9982 """Add evacuate nodes data to allocator structure.
9986 "evac_nodes": self.evac_nodes
9990 def _BuildInputData(self, fn):
9991 """Build input data structures.
9994 self._ComputeClusterData()
9997 request["type"] = self.mode
9998 self.in_data["request"] = request
10000 self.in_text = serializer.Dump(self.in_data)
10002 def Run(self, name, validate=True, call_fn=None):
10003 """Run an instance allocator and return the results.
10006 if call_fn is None:
10007 call_fn = self.rpc.call_iallocator_runner
10009 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10010 result.Raise("Failure while running the iallocator script")
10012 self.out_text = result.payload
10014 self._ValidateResult()
10016 def _ValidateResult(self):
10017 """Process the allocator results.
10019 This will process and if successful save the result in
10020 self.out_data and the other parameters.
10024 rdict = serializer.Load(self.out_text)
10025 except Exception, err:
10026 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10028 if not isinstance(rdict, dict):
10029 raise errors.OpExecError("Can't parse iallocator results: not a dict")
10031 # TODO: remove backwards compatiblity in later versions
10032 if "nodes" in rdict and "result" not in rdict:
10033 rdict["result"] = rdict["nodes"]
10036 for key in "success", "info", "result":
10037 if key not in rdict:
10038 raise errors.OpExecError("Can't parse iallocator results:"
10039 " missing key '%s'" % key)
10040 setattr(self, key, rdict[key])
10042 if not isinstance(rdict["result"], list):
10043 raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10045 self.out_data = rdict
10048 class LUTestAllocator(NoHooksLU):
10049 """Run allocator tests.
10051 This LU runs the allocator tests
10055 ("direction", _TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10056 ("mode", _TElemOf(constants.VALID_IALLOCATOR_MODES)),
10057 ("name", _TNonEmptyString),
10058 ("nics", _TOr(_TNone, _TListOf(
10059 _TDictOf(_TElemOf(["mac", "ip", "bridge"]),
10060 _TOr(_TNone, _TNonEmptyString))))),
10061 ("disks", _TOr(_TNone, _TList)),
10064 ("hypervisor", None),
10065 ("allocator", None),
10070 def CheckPrereq(self):
10071 """Check prerequisites.
10073 This checks the opcode parameters depending on the director and mode test.
10076 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10077 for attr in ["mem_size", "disks", "disk_template",
10078 "os", "tags", "nics", "vcpus"]:
10079 if not hasattr(self.op, attr):
10080 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10081 attr, errors.ECODE_INVAL)
10082 iname = self.cfg.ExpandInstanceName(self.op.name)
10083 if iname is not None:
10084 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10085 iname, errors.ECODE_EXISTS)
10086 if not isinstance(self.op.nics, list):
10087 raise errors.OpPrereqError("Invalid parameter 'nics'",
10088 errors.ECODE_INVAL)
10089 if not isinstance(self.op.disks, list):
10090 raise errors.OpPrereqError("Invalid parameter 'disks'",
10091 errors.ECODE_INVAL)
10092 for row in self.op.disks:
10093 if (not isinstance(row, dict) or
10094 "size" not in row or
10095 not isinstance(row["size"], int) or
10096 "mode" not in row or
10097 row["mode"] not in ['r', 'w']):
10098 raise errors.OpPrereqError("Invalid contents of the 'disks'"
10099 " parameter", errors.ECODE_INVAL)
10100 if self.op.hypervisor is None:
10101 self.op.hypervisor = self.cfg.GetHypervisorType()
10102 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10103 fname = _ExpandInstanceName(self.cfg, self.op.name)
10104 self.op.name = fname
10105 self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10106 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10107 if not hasattr(self.op, "evac_nodes"):
10108 raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10109 " opcode input", errors.ECODE_INVAL)
10111 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10112 self.op.mode, errors.ECODE_INVAL)
10114 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10115 if self.op.allocator is None:
10116 raise errors.OpPrereqError("Missing allocator name",
10117 errors.ECODE_INVAL)
10118 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10119 raise errors.OpPrereqError("Wrong allocator test '%s'" %
10120 self.op.direction, errors.ECODE_INVAL)
10122 def Exec(self, feedback_fn):
10123 """Run the allocator test.
10126 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10127 ial = IAllocator(self.cfg, self.rpc,
10130 mem_size=self.op.mem_size,
10131 disks=self.op.disks,
10132 disk_template=self.op.disk_template,
10136 vcpus=self.op.vcpus,
10137 hypervisor=self.op.hypervisor,
10139 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10140 ial = IAllocator(self.cfg, self.rpc,
10143 relocate_from=list(self.relocate_from),
10145 elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10146 ial = IAllocator(self.cfg, self.rpc,
10148 evac_nodes=self.op.evac_nodes)
10150 raise errors.ProgrammerError("Uncatched mode %s in"
10151 " LUTestAllocator.Exec", self.op.mode)
10153 if self.op.direction == constants.IALLOCATOR_DIR_IN:
10154 result = ial.in_text
10156 ial.Run(self.op.allocator, validate=False)
10157 result = ial.out_text